{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Merging of the different MIMIC data sources\n",
    "\n",
    "##### This file takes as inputs : \n",
    "\n",
    "-LAB_processed (from notebook LabEvents) with the pre-selected and cleaned lab measurements of the patients\n",
    "\n",
    "-INPUTS_processed (from notebook Admissions) with the pre-selected and cleaned inputs to the patients\n",
    "\n",
    "-Admissions_processed (from the notebook Admissions) with the death label of the patients\n",
    "\n",
    "-Diagnoses_ICD with the ICD9 codes of each patient.\n",
    "\n",
    "##### This notebook outputs :\n",
    "\n",
    "-death_tags.csv. A dataframe with the patient id and the corresponding death label\n",
    "\n",
    "-complete_tensor_csv. A dataframe containing all the measurments in tensor version.\n",
    "\n",
    "-complete_tensor_train.csv. A dataframe containing all the training measurments in tensor version.\n",
    "\n",
    "-complete_tensor_val.csv. A dataframe containing all the validation measurments in tensor version.\n",
    "\n",
    "-complete_covariates.csv. A dataframe with the ICD9 covariates codes (binary) of each patient index."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "import datetime\n",
    "from datetime import timedelta\n",
    "import numpy as np\n",
    "\n",
    "file_path=\"~/Documents/Data/Full_MIMIC/\"\n",
    "outfile_path=\"~/Documents/Data/Full_MIMIC/Clean_data/\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/XXXX/miniconda3/envs/pytorch/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2728: DtypeWarning: Columns (6,9) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  interactivity=interactivity, compiler=compiler, result=result)\n",
      "/Users/XXXX/miniconda3/envs/pytorch/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2728: DtypeWarning: Columns (12) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  interactivity=interactivity, compiler=compiler, result=result)\n",
      "/Users/XXXX/miniconda3/envs/pytorch/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2728: DtypeWarning: Columns (12,17) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  interactivity=interactivity, compiler=compiler, result=result)\n"
     ]
    }
   ],
   "source": [
    "lab_df=pd.read_csv(file_path+\"LAB_processed.csv\")[[\"SUBJECT_ID\",\"HADM_ID\",\"CHARTTIME\",\"VALUENUM\",\"LABEL\"]]\n",
    "inputs_df=pd.read_csv(file_path+\"INPUTS_processed.csv\")[[\"SUBJECT_ID\",\"HADM_ID\",\"CHARTTIME\",\"AMOUNT\",\"LABEL\"]]\n",
    "outputs_df=pd.read_csv(file_path+\"OUTPUTS_processed.csv\")[[\"SUBJECT_ID\",\"HADM_ID\",\"CHARTTIME\",\"VALUE\",\"LABEL\"]]\n",
    "presc_df=pd.read_csv(file_path+\"PRESCRIPTIONS_processed.csv\")[[\"SUBJECT_ID\",\"HADM_ID\",\"CHARTTIME\",\"DOSE_VAL_RX\",\"DRUG\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Process names of columns to have the same everywhere.\n",
    "\n",
    "#Change the name of amount. Valuenum for every table\n",
    "inputs_df[\"VALUENUM\"]=inputs_df[\"AMOUNT\"]\n",
    "inputs_df.head()\n",
    "inputs_df=inputs_df.drop(columns=[\"AMOUNT\"]).copy()\n",
    "\n",
    "#Change the name of amount. Valuenum for every table\n",
    "outputs_df[\"VALUENUM\"]=outputs_df[\"VALUE\"]\n",
    "outputs_df=outputs_df.drop(columns=[\"VALUE\"]).copy()\n",
    "\n",
    "#Change the name of amount. Valuenum for every table\n",
    "presc_df[\"VALUENUM\"]=presc_df[\"DOSE_VAL_RX\"]\n",
    "presc_df=presc_df.drop(columns=[\"DOSE_VAL_RX\"]).copy()\n",
    "presc_df[\"LABEL\"]=presc_df[\"DRUG\"]\n",
    "presc_df=presc_df.drop(columns=[\"DRUG\"]).copy()\n",
    "\n",
    "\n",
    "#Tag to distinguish between lab and inputs events\n",
    "inputs_df[\"Origin\"]=\"Inputs\"\n",
    "lab_df[\"Origin\"]=\"Lab\"\n",
    "outputs_df[\"Origin\"]=\"Outputs\"\n",
    "presc_df[\"Origin\"]=\"Prescriptions\"\n",
    "\n",
    "\n",
    "#merge both dfs.\n",
    "merged_df1=(inputs_df.append(lab_df)).reset_index()\n",
    "merged_df2=(merged_df1.append(outputs_df)).reset_index()\n",
    "merged_df2.drop(columns=\"level_0\",inplace=True)\n",
    "merged_df=(merged_df2.append(presc_df)).reset_index()\n",
    "\n",
    "#merged_df=lab_df.reset_index()\n",
    "\n",
    "#Check that all labels have different names.\n",
    "assert(merged_df[\"LABEL\"].nunique()==(inputs_df[\"LABEL\"].nunique()+lab_df[\"LABEL\"].nunique()+outputs_df[\"LABEL\"].nunique()+presc_df[\"LABEL\"].nunique()))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>level_0</th>\n",
       "      <th>CHARTTIME</th>\n",
       "      <th>HADM_ID</th>\n",
       "      <th>LABEL</th>\n",
       "      <th>Origin</th>\n",
       "      <th>SUBJECT_ID</th>\n",
       "      <th>VALUENUM</th>\n",
       "      <th>index</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>2133-02-05 06:29:00</td>\n",
       "      <td>139787.0</td>\n",
       "      <td>Potassium Chloride</td>\n",
       "      <td>Inputs</td>\n",
       "      <td>27063</td>\n",
       "      <td>1.354906</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>2133-02-05 06:59:00</td>\n",
       "      <td>139787.0</td>\n",
       "      <td>Potassium Chloride</td>\n",
       "      <td>Inputs</td>\n",
       "      <td>27063</td>\n",
       "      <td>1.354906</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>2133-02-05 07:29:00</td>\n",
       "      <td>139787.0</td>\n",
       "      <td>Potassium Chloride</td>\n",
       "      <td>Inputs</td>\n",
       "      <td>27063</td>\n",
       "      <td>1.354906</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>2133-02-05 07:59:00</td>\n",
       "      <td>139787.0</td>\n",
       "      <td>Potassium Chloride</td>\n",
       "      <td>Inputs</td>\n",
       "      <td>27063</td>\n",
       "      <td>1.354906</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>2133-02-05 08:29:00</td>\n",
       "      <td>139787.0</td>\n",
       "      <td>Potassium Chloride</td>\n",
       "      <td>Inputs</td>\n",
       "      <td>27063</td>\n",
       "      <td>1.354906</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   level_0            CHARTTIME   HADM_ID               LABEL  Origin  \\\n",
       "0        0  2133-02-05 06:29:00  139787.0  Potassium Chloride  Inputs   \n",
       "1        1  2133-02-05 06:59:00  139787.0  Potassium Chloride  Inputs   \n",
       "2        2  2133-02-05 07:29:00  139787.0  Potassium Chloride  Inputs   \n",
       "3        3  2133-02-05 07:59:00  139787.0  Potassium Chloride  Inputs   \n",
       "4        4  2133-02-05 08:29:00  139787.0  Potassium Chloride  Inputs   \n",
       "\n",
       "   SUBJECT_ID  VALUENUM  index  \n",
       "0       27063  1.354906    0.0  \n",
       "1       27063  1.354906    1.0  \n",
       "2       27063  1.354906    2.0  \n",
       "3       27063  1.354906    3.0  \n",
       "4       27063  1.354906    4.0  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "merged_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Set the reference time as the lowest chart time for each admission.\n",
    "merged_df['CHARTTIME']=pd.to_datetime(merged_df[\"CHARTTIME\"], format='%Y-%m-%d %H:%M:%S')\n",
    "ref_time=merged_df.groupby(\"HADM_ID\")[\"CHARTTIME\"].min()\n",
    "\n",
    "merged_df_1=pd.merge(ref_time.to_frame(name=\"REF_TIME\"),merged_df,left_index=True,right_on=\"HADM_ID\")\n",
    "merged_df_1[\"TIME_STAMP\"]=merged_df_1[\"CHARTTIME\"]-merged_df_1[\"REF_TIME\"]\n",
    "assert(len(merged_df_1.loc[merged_df_1[\"TIME_STAMP\"]<timedelta(hours=0)].index)==0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Create a label code (int) for the labels.\n",
    "label_dict=dict(zip(list(merged_df_1[\"LABEL\"].unique()),range(len(list(merged_df_1[\"LABEL\"].unique())))))\n",
    "merged_df_1[\"LABEL_CODE\"]=merged_df_1[\"LABEL\"].map(label_dict)\n",
    "\n",
    "merged_df_short=merged_df_1[[\"HADM_ID\",\"VALUENUM\",\"TIME_STAMP\",\"LABEL_CODE\",\"Origin\"]]\n",
    "\n",
    "#To do : store the label dictionnary in a csv file."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "label_dict_df=pd.Series(merged_df_1[\"LABEL\"].unique()).reset_index()\n",
    "label_dict_df.columns=[\"index\",\"LABEL\"]\n",
    "label_dict_df[\"LABEL_CODE\"]=label_dict_df[\"LABEL\"].map(label_dict)\n",
    "label_dict_df.drop(columns=[\"index\"],inplace=True)\n",
    "label_dict_df.to_csv(outfile_path+\"label_dict.csv\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Time binning of the data\n",
    "First we select the data up to a certain time limit (48 hours)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of patients considered :23457\n"
     ]
    }
   ],
   "source": [
    "#Now only select values within 48 hours.\n",
    "merged_df_short=merged_df_short.loc[(merged_df_short[\"TIME_STAMP\"]<timedelta(hours=48))]\n",
    "print(\"Number of patients considered :\"+str(merged_df_short[\"HADM_ID\"].nunique()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEWCAYAAACXGLsWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3XucnGV99/HPd2Z29pTNORxy5gwBIWgIWhGRog0qYC0qeAK1ovVB26q1+NSXUrRVS63UpzwKUkWqyIMHFCiKiKJiERLOJAiEEEgIJJtzdjd7/j1/3PeGyWR2d3azk8nuft+v17525j7+rpl75jf3dd33dSkiMDMzG0im2gGYmdn+z8nCzMwG5WRhZmaDcrIwM7NBOVmYmdmgnCzMzGxQThY2IpT4tqQtku4rMf9CSXcPsP7PJF0wAnG8S9IvhrnugGWopJEq/zD2+wVJGyW9WObyl0r67gjt+1pJXxhgfoukQ0dgP8M+JsrY9lGSHpS0Q9LHKrGP/YWTRQFJqyXtTA/S9ekXx4Rqx1UojfGMasdRwinA64HZEbF4qCtHxJkR8R0YPLEMsp3vRcQbhrMue1mGcpX6wi0s/74iaQ7wCWBBRBxUYv5pktbuy5gKRcSEiFg1AtvZm2NiMJ8C7oqIpoj42nA3Mlji3B84WezprIiYALwcOAn4zFA3ICk34lHt/+YBqyOitdqB7IWxUIahmAdsiogN1Q5kFJsHLK92EJKyFd9JRPgv/QNWA2cUPL8cuDV9PAn4T+AF4HngC0A2nXch8Hvgq8Bm4Avp9A8CjwM7gBXAy9PpM4EfAc3AM8DHCvZ5KXAjcF263nJgUTrvv4BeYCfQAnwqnf4D4EVgG/Bb4NiC7U0DbgG2A0vTuO8umH80cEca9xPA2wd4fWYCN6fLrgQ+mE7/ANAO9KRx/WOJdS8E7gb+FdiSlvvMgvl3AX8JHFO0ra3p/Demr+GO9PX/ZD8xXlhUvgA+DDyV7vdKQCXW26MMxdsq2N7h6eNr0+39dxrXvcBhBcseW/Dargf+N7AE6AS60v08XFj+9HGG5EfKs8CG9FiYlM6bn8ZwAfAcsBH4hwHes0np+s3p9j6Tbv+M9DjqTeO4tmi9xqL5Len7fyn9HJ+DHdslYrsW+Eb6Gu0AfgPMG+Zr3e/7PJRjAsgCX0lf12eAi9PlcyXi/1V6vLSnr8+RwJuAB0k+b2uAS4vWOQX4H2BrOv9C4KL0eOhMt3NLuuwx6XGxNX2dzy567b4O3Aa0UvC9VbHvx0rvYDT9UZAsgDnpG/T59PlPgKvSD9EBwH3AhwoOxm7go0AOqAfeRvKldhIg4HCSXyEZ4H7gs0AeOBRYBfxZuq1L04PvjemB+0XgD6ViLJj2fqAJqAWuAB4qmHdD+tcALEgP0LvTeY3p8/elcb88/ZAc28/r8xvg/wJ1wEKSL4Q/LfWBLLHuhekH4oNpuf4KWMdLH9K7eOnLco9tkSTp16SPp5Am3n72U/zFcCswGZibxrykzHVLxVH8BbYZWJy+ft8DbkjnNaUxfyJ9vZqAkwve4+8Wbbew/O8nScaHAhOAHwP/lc6bn8bwTZLj7ASgAzimnzJdB/w03f984EngA+m804C1A7xne8xngOOTQY7tEtu/luSL/1SSY/ffS7x3g77Wg73PQzkmSJLICmA2yXH2S/pJFsXvW8Fr9rL0tTie5EfCW9J5c9Pyng/UkPyQW1hQvi8UbKcmPQb+d/panp6ue1TB8tuAV6f7qqv492OldzCa/ki+iFtIMvmzJF+M9cCB6QeyvmDZ84FfFxyMzxVt63bgr0vs4+QSy34a+Hb6+FLglwXzFgA7i2Ls91dE+gEIkl+UWZIv6KMK5u86swDeAfyuaP2rgM+V2O4ckl9RTQXTvkj6i7T4A1li/QuBlQXPG9I4D0qf7/rQldoWya/oDwETB3kPd1s33ccpBc9vBC4pc91ScRR/gV1TMO+NwB8Ljo8H+9nPpQycLO4EPlIw76j0fczxUrKYXTD/PuC8EvvJkhy3CwqmfYikjh2GnyxKHp8McmyX2P617P6FPyE9xuYM5bUe7H0eyjFBcrbwoYJ5ZzCEZFFi/hXAVwtei5sGeC0Kk8VrSGoLMgXTvk96ppIuf91An4WR/huPdeuDeUtE/LJwgqSXkWT6FyT1Tc6Q/CrvU/gYki/Xp0tsfx4wU9LWgmlZ4HcFzwuvTGkD6iTlIqK7eGNpXeU/kZzJzCCpNgCYTpLocgPEOQ84uSiWHEl1V7GZwOaI2FEw7VlgUYll+7OrXBHRlr6W5V5A8BckVShfkvQIyYf7nqHul+T1HMmLFvrbdn/vfzlmkry2fZ4leV8OLGO/haaT/Cot3tasYcbV377r0na6co7tYruOx4hokbSZpPzFn6dS+y0u81De5/6WLd53qTj6Jelk4EvAcSSvfS1JNTEM7ZiYCayJiN6CacXv3ZBi21tOFuVZQ/ILbXqpL+xUlFjnsH629UxEHDHMWIr3807gHJJfQKtJzii2kFR9NZNUj80mqX6A5IAtjOU3EfH6Mva7DpgqqakgYcwlqWobacVlJCKWAudIqiGpR76R3ctSCa0kZ0AASNrjiqEBrCE5uyhlj/IVWUfyxdtnLsn7uJ7kvSzXRpIzknkkVSt92yr3PRsszmLDObZ3vYfplYdTScpfLS+w+2s81GPseuA/SNrj2iVdQZK0IXl9+rvKrvi1XgfMkZQpSBhzeelzXGqdivLVUGWIiBeAXwBfkTRRUkbSYZJeO8Bq1wCflPSK9Pr9wyXNI6ky2C7p7yXVS8pKOk7SSWWGs56kLrhPE0ki20TyxfbPBXH3kNR3XyqpQdLRwHsL1r0VOFLSeyTVpH8nSTqmxGuwhqRh7ouS6iQdT9Io/L0y4x6K9cBsSXkASfn0WvlJEdFF0njYU4H9FnsYOFbSQkl1JFUw5boVOEjS30iqldSU/uqEpHzzJfX3+fs+8LeSDkm/QP8Z+H8D/FApKX3/bwT+Kd3/PODjQLn3SawHpkmaVObywzm23yjplPS9/jxwb3qsVcuNwF9LmiVpMvD3Q1y/ieQMvF3SYpIfc32+B5wh6e2ScpKmSVqYziv+XN9L8mPlU+nn8jTgLJL2x6pwsijfe0lOK1eQ/HL/IXBwfwtHxA9IqoeuJ2mY+gkwNf0An0XSQPwMya+/a0jOCMrxReAzkrZK+iRJA+azJL8WVwB/KFr+4nTbL5JUL32fJLmQniG8ATiP5JfMi8CXSU6dSzmfpM58HXATSdvGHWXGPRS/Irm44EVJG9Np7wFWS9pO0gj57grsdzcR8SRwGUkj51MkV3OVu+4Okns2ziJ5XZ8CXpfO7quW2CTpgRKrf4vkvfotyTHSTnLxxHB8lORLZxVJ/Nen2x9URPyR5HhZlR5vMwdZfjjH9vXA50gar18BvKuc2CromyQ/DB8huarpNpKzunJ/nHwEuEzSDpKG/hv7ZkTEcyRtLZ8gKe9DJBcoQHKl5YL0df5JRHQCZwNnkryO/xd4b/qeVEXflSg2Tkj6Mkmj8gXVjsVsfyfpTOAbETFv0IXHOJ9ZjHGSjpZ0fFoVtpik6uimasdltj9Kq8/emFYTzSI56/HnBSeL8aCJpN2ileSU+Csk192b2Z5EckPmFpJqqMdJqpPGPVdDmZnZoHxmYWZmgxoz91lMnz495s+fX+0wzMxGlfvvv39jRMwYbLkxkyzmz5/PsmXLqh2GmdmoIunZwZdyNZSZmZXBycLMzAblZGFmZoNysjAzs0FVNFlIWiLpCUkrJV1SYv7HJa2Q9IikO9OOzkg7brtH0vJ03jsqGaeZmQ2sYskiHWfhSpKOsBYA50taULTYgyRDMh5P0jHfv6TT20g6zTqWZBjKK9IeIM3MrAoqeWaxmGRktFVpD4o3kIy7sEtE/Doi2tKnfyDtRz4inoyIp9LH60jGIR70OmAzM6uMSiaLWew+ktNaBh6h6wPAz4onpp3f5SkxwpSkiyQtk7Ssubl5WEG2dHTzb3c8yUNrtg6+sJnZOFXJZKES00p2RCXp3STDc15eNP1gkn7931c0vGCysYirI2JRRCyaMWN4Jx5d3b187c6nePC5LcNa38xsPKjkHdxr2X1IwtmUGC5R0hnAPwCvjYiOgukTgf8GPhMRxQP6jJj6fBaAts59MfCamdnoVMkzi6XAEenQkHmS0dhuLlxA0onAVcDZEbGhYHqepA/569IR5yqmNpdBgp1OFmZm/apYskjHC74YuJ2kT/gbI2K5pMsknZ0udjkwAfiBpIck9SWTtwOnAhem0x8qGKt2REmioSbLzi4nCzOz/lS0I8GIuI1kDNvCaZ8teHxGP+t9l/IHld9r9fmcq6HMzAbgO7iB+nyGnZ3d1Q7DzGy/5WQBNNT4zMLMbCBOFiRXRLnNwsysf04WQEM+66uhzMwG4GQB1NdkXQ1lZjYAJwtcDWVmNhgnC1wNZWY2GCcLoCGfo82XzpqZ9cvJAqjzHdxmZgNysiCphurqCbp69ujY1szMcLIAkmQB+OzCzKwfTha81E25G7nNzEpzsiC5zwI8poWZWX+cLCiohnKyMDMrycmCpItygJ1dvnzWzKwUJwteOrNwNZSZWWlOFrjNwsxsME4WvHQ1VLsvnTUzK8nJAldDmZkNpqLJQtISSU9IWinpkhLzPy5phaRHJN0paV7BvAskPZX+XVDJOBtqkgZuJwszs9IqliwkZYErgTOBBcD5khYULfYgsCgijgd+CPxLuu5U4HPAycBi4HOSplQq1rp88jJ4HG4zs9IqeWaxGFgZEasiohO4ATincIGI+HVEtKVP/wDMTh//GXBHRGyOiC3AHcCSSgWaz2bIZuTuPszM+lHJZDELWFPwfG06rT8fAH42lHUlXSRpmaRlzc3Nww5UEg0eLc/MrF+VTBYqMS1KLii9G1gEXD6UdSPi6ohYFBGLZsyYMexAIR0tz8nCzKykSiaLtcCcguezgXXFC0k6A/gH4OyI6BjKuiOpPu8zCzOz/lQyWSwFjpB0iKQ8cB5wc+ECkk4EriJJFBsKZt0OvEHSlLRh+w3ptIqp9wBIZmb9ylVqwxHRLeliki/5LPCtiFgu6TJgWUTcTFLtNAH4gSSA5yLi7IjYLOnzJAkH4LKI2FypWMHjcJuZDaRiyQIgIm4Dbiua9tmCx2cMsO63gG9VLrrdeRxuM7P++Q7uVJ2vhjIz65eTRaohn3XfUGZm/XCySDX4aigzs345WaR8n4WZWf+cLFL1NVnaunqIKHnfoJnZuOZkkWrIZ+npDbp6nCzMzIo5WaR2jcPtqigzsz04WaR2DYDU5XstzMyKOVmkPA63mVn/nCxSfeNwuxrKzGxPThapvmoodyZoZrYnJ4vUrjYLn1mYme3BySJVV9NXDeUGbjOzYk4WqYa+S2ddDWVmtgcni5SroczM+udkkfLVUGZm/XOySPk+CzOz/jlZpGqyGWqycpuFmVkJThYF6mvcTbmZWSkVTRaSlkh6QtJKSZeUmH+qpAckdUs6t2jev0haLulxSV+TpErGCh6H28ysPxVLFpKywJXAmcAC4HxJC4oWew64ELi+aN0/AV4NHA8cB5wEvLZSsfap92h5ZmYl5Sq47cXAyohYBSDpBuAcYEXfAhGxOp3XW7RuAHVAHhBQA6yvYKxAUg3lcbjNzPZUyWqoWcCagudr02mDioh7gF8DL6R/t0fE48XLSbpI0jJJy5qbm/c6YI/DbWZWWiWTRak2hrKGoZN0OHAMMJskwZwu6dQ9NhZxdUQsiohFM2bM2KtgwdVQZmb9qWSyWAvMKXg+G1hX5rp/DvwhIloiogX4GfDKEY5vD74aysystEomi6XAEZIOkZQHzgNuLnPd54DXSspJqiFp3N6jGmqkNeSzvs/CzKyEiiWLiOgGLgZuJ/mivzEilku6TNLZAJJOkrQWeBtwlaTl6eo/BJ4GHgUeBh6OiFsqFWuf+nzO1VBmZiVU8mooIuI24LaiaZ8teLyUpHqqeL0e4EOVjK2UhnzWXZSbmZXgO7gL1NdkaevqIaKsdngzs3HDyaJAfT5LBHR0F9/2YWY2vjlZFGhwN+VmZiU5WRTYNQCSr4gyM9uNk0UBj8NtZlaak0WBXeNwd7rNwsyskJNFgZfG4faZhZlZISeLAvVuszAzK8nJokB9ja+GMjMrxcmigC+dNTMrzcmigKuhzMxKc7Io8NLVUG7gNjMr5GRRoK/Nwj3PmpntzsmiQDYj8rmMx7QwMyviZFEk6abcycLMrJCTRZGGGo/DbWZWzMmiSJ3PLMzM9uBkUcTjcJuZ7WlIyUJSRtLESgWzP2ioyblvKDOzIoMmC0nXS5ooqRFYATwh6e/K2bikJZKekLRS0iUl5p8q6QFJ3ZLOLZo3V9IvJD0uaYWk+eUVae/UuxrKzGwP5ZxZLIiI7cBbgNuAucB7BltJUha4EjgTWACcL2lB0WLPARcC15fYxHXA5RFxDLAY2FBGrHut3g3cZmZ7KCdZ1EiqIUkWP42IrjK3vRhYGRGrIqITuAE4p3CBiFgdEY8Auw0gkSaVXETckS7XEhFtZe53r7jNwsxsT+Uki6uA1UAj8FtJ84BtZaw3C1hT8HxtOq0cRwJbJf1Y0oOSLk/PVHYj6SJJyyQta25uLnPTA3M1lJnZnspJFrdExKyIeGNEBEnV0fvLWE8lpkWZceWA1wCfBE4CDiWprtp9YxFXR8SiiFg0Y8aMMjc9sIa8q6HMzIqVkyx+VPgkTRg3lLHeWmBOwfPZwLoy41oLPJhWYXUDPwFeXua6e6W+JqmG6u0tN6+ZmY19uf5mSDoaOBaYJOmtBbMmAnVlbHspcISkQ4DngfOAd5YZ11JgiqQZEdEMnA4sK3PdvVKf9jzb0d27q8tyM7Pxrt9kARwFvBmYDJxVMH0H8MHBNhwR3ZIuBm4HssC3ImK5pMuAZRFxs6STgJuAKcBZkv4xIo6NiB5JnwTulCTgfuCbwyngUBWOw+1kYWaW6DdZRMRPgZ9KelVE3DOcjUfEbSSX2xZO+2zB46Uk1VOl1r0DOH44+90buwZA6uxh2r7euZnZfmqgaqhPRcS/AO+UdH7x/Ij4WEUjq5Jd43D78lkzs10GqoZ6PP2/T9oK9hceh9vMbE8DVUPdkv7/zr4Lp/oKq6HMzCwx0JkFAJKOJLnfYX7h8hFxeuXCqp5d43B3uTNBM7M+gyYL4AfAN4BrgDH/c3tXm0Vn7yBLmpmNH+Uki+6I+HrFI9lPFF46a2ZmiYGuhpqaPrxF0kdI7ofo6JsfEZsrHFtV9LVZ+GooM7OXDHRmcT9JX059fTwVjmERJP01jTkNbuA2M9vDQFdDHbIvA9lf1OV86ayZWTGPwV0kkxF1NRlXQ5mZFXCyKKEh73G4zcwK9ZssJL06/V+778LZP3hoVTOz3Q10ZvG19P+wOhEczerzWdpdDWVmtstAV0N1Sfo2MEvS14pnjtWOBMGj5ZmZFRsoWbwZOINk4KH79004+wdXQ5mZ7W6gS2c3AjdIejwiHt6HMVVdQz7LxpbOaodhZrbfKOdqqE2SbpK0QdJ6ST+SVHLAorGiPp/1pbNmZgXKSRbfBm4GZgKzgFvSaWNWfU3ON+WZmRUoJ1kcEBHfjoju9O9aYEaF46qqpIHb91mYmfUpJ1k0S3q3pGz6925gUzkbl7RE0hOSVkq6pMT8UyU9IKlb0rkl5k+U9Lyk/yhnfyPFV0OZme2unGTxfuDtwIvAC8C56bQBScoCVwJnAguA8yUtKFrsOeBC4Pp+NvN54DdlxDii6mqydHT30tsb+3rXZmb7pUHHs4iI54Czh7HtxcDKiFgFIOkG4BxgRcG2V6fz9hhpSNIrgAOBnwOLhrH/YWso6Ka8sbacIT/MzMa2SvYNNQtYU/B8bTptUJIywFfYvVv0UstdJGmZpGXNzc3DDrSYuyk3M9tdJZOFSkwrt17nI8BtEbFmoIUi4uqIWBQRi2bMGLk29/q+cbidLMzMgPKGVR2utcCcguezgXVlrvsq4DXpCH0TgLyklojYo5G8EnaNw+17LczMgCGcWUh6paRfSfq9pLeUscpS4AhJh0jKA+eR3K8xqIh4V0TMjYj5wCeB6/ZVogCPw21mVmygLsoPKpr0cZKG7iUkVykNKCK6gYuB24HHgRsjYrmkyySdne7jJElrgbcBV0laPrxijKxd43C7GsrMDBi4Guobku4HLo+IdmAr8E6gF9hezsYj4jbgtqJpny14vJSkemqgbVwLXFvO/kaKG7jNzHbX75lFRLwFeAi4VdJ7gL8hSRQNQDnVUKOW2yzMzHY3YJtFRNwC/BkwGfgx8EREfC0iRu461f2Qq6HMzHY3UJvF2ZLuBn4FPEbSQP3nkr4v6bB9FWA1NKSXzrqB28wsMVCbxRdILmGtJ7nnYTHwcUlHAP9EkjzGpF1tFq6GMjMDBk4W20gSQj2woW9iRDzFGE4UALW5DBK0uxrKzAwYuM3iz0kas7tJroIaNyR5aFUzswKDDav6f/ZhLPuVhnzW1VBmZqlK9g01qtXns74aysws5WTRjykNedZvb692GGZm+wUni368bNYkHl27zQMgmZnhZNGvhXMms6Ojm1UbW6odiplZ1TlZ9OPEuZMBePC5rVWOxMys+pws+nHo9Ak01eZ4aI2ThZmZk0U/Mhlx/JxJThZmZjhZDGjhnMn88cUdtPt+CzMb55wsBrBwzhR6eoPHnt9W7VDMzKrKyWIAJ8yZBOCqKDMb95wsBnBAUx2zJtfzoJOFmY1zThaDWDhnMg/58lkzG+cqmiwkLZH0hKSVki4pMf9USQ9I6pZ0bsH0hZLukbRc0iOS3lHJOAeycM5knt+6k+YdHdUKwcys6iqWLCRlgSuBM4EFwPmSFhQt9hxwIXB90fQ24L0RcSywBLhC0uRKxTqQhenNeQ+7KsrMxrFKnlksBlZGxKqI6ARuAM4pXCAiVkfEI0Bv0fQn00GWiIh1JIMvzahgrP06buYkshm5kdvMxrVKJotZwJqC52vTaUMiaTGQB54eobiGpD6f5agDm5wszGxcq2SyUIlpQ+rCVdLBwH8B74uI3hLzL5K0TNKy5ubmYYY5uIVzJ/Pw2q3ugdbMxq1KJou1wJyC57OBdeWuLGki8N/AZyLiD6WWiYirI2JRRCyaMaNytVQL50xmR3s3qza2VmwfZmb7s0omi6XAEZIOkZQHzgNuLmfFdPmbgOsi4gcVjLEsC+ckjdyuijKz8apiySIiuoGLgduBx4EbI2K5pMsknQ0g6SRJa4G3AVdJWp6u/nbgVOBCSQ+lfwsrFetgDpsxgQm1OR5as6VaIZiZVVWukhuPiNuA24qmfbbg8VKS6qni9b4LfLeSsQ1FNiOOnz2Jh9e4jygzG598B3eZFs6ZzOMvbHcPtGY2LjlZlOmEOZPp7g2Wr/PZhZmNP04WZTpxjodZNbPxy8miTAdMrGPmpDpfEWVm45KTxRAsnDuZ+5/dQo9vzjOzccbJYgje9LKZvLCtnduXv1jtUMzM9ikniyFYctxBzJ/WwNfvepoIn12Y2fjhZDEE2Yz40GsP49Hnt3H3yo3VDsfMbJ9xshiit758Fgc01fL1u6rSCa6ZWVU4WQxRbS7LX77mEP7n6U2+MsrMxg0ni2F458nzmFiX4+t3rax2KGZm+4STxTBMqM1xwZ/M5/bl61m5YUe1wzEzqzgni2G68E/mU1eT4Ru/WVXtUMzMKs7JYpimTajlvJPm8pMHn+f5rTurHY6ZWUU5WeyFv3zNIQBc8zufXZjZ2OZksRdmT2ng7IUzueG+NT67MLMxzcliL33s9CPIZsRF1y2jrbO72uGYmVWEk8Vemj+9kf9z/omseGE7f/eDR9wNiJmNSU4WI+B1Rx/AJUuO5r8ffYGv3el7L8xs7KlospC0RNITklZKuqTE/FMlPSCpW9K5RfMukPRU+ndBJeMcCRedeihvPXEWX/3lk/zs0ReqHY6Z2YiqWLKQlAWuBM4EFgDnS1pQtNhzwIXA9UXrTgU+B5wMLAY+J2lKpWIdCZL457e+jBPnTubjNz7MinXbqx2SmdmIqeSZxWJgZUSsiohO4AbgnMIFImJ1RDwC9Bat+2fAHRGxOSK2AHcASyoY64ioq8ly1btfwaT6Gj543TI2bG+vdkhmZiOiksliFrCm4PnadFql162qAybW8c33LmJzayfnXPl7HlnrzgbNbPSrZLJQiWnlXipU1rqSLpK0TNKy5ubmIQVXSS+bPYkf/tWryEic+417+PEDa6sdkpnZXqlkslgLzCl4PhtYN5LrRsTVEbEoIhbNmDFj2IFWwrEzJ3HLR0/hFXOn8PEbH+bzt66gu6e4ts3MbHSoZLJYChwh6RBJeeA84OYy170deIOkKWnD9hvSaaPK1MY8131gMe979Xz+8+5nuODb97GltbPaYZmZDVnFkkVEdAMXk3zJPw7cGBHLJV0m6WwASSdJWgu8DbhK0vJ03c3A50kSzlLgsnTaqFOTzfC5s47l8nOPZ+nqLbzpa79j6epRWRQzG8c0Vu44XrRoUSxbtqzaYQzo0bXbuPj7D7Bmcxt/e8aRfOR1h5PNlGqeMTPbNyTdHxGLBlvOd3DvQy+bPYlbP3oKZ50wk6/c8STvvuZe1vvyWjMbBZws9rGmuhqueMdCLj/3eB5as5Uz//133Pn4+mqHZWY2ICeLKpDE2xbN4ZaPnsIBTbV84DvL+MvvLOPZTa3VDs3MrCQniyo6/IAJ/PTiV/P3S47mnqc38vp/+y1f/vkfaelwV+dmtn9xsqiy2lyWvzrtMH71ydN48wkH8/W7nub0f72LG5etYWdnT7XDMzMDfDXUfueB57bwj7es4OE1W6mvyXLaUTNYctxBvO7oA5hYV1Pt8MxsjCn3aigni/1Qb29wz6pN/PyxF7l9+Yts2NFBPpvh1YdP4/ULDuJPjzmAAyfWVTtMMxsDnCzGiN7e4ME1W/j5Yy/y8+UvsmZzMtb3CbMnccYxB3LGggM5+qAmJN+vYWZD52QxBkUET65v4ZePr+eOFet5eO1WIuDQ6Y2cu2g2f/Hy2T7jMLMhcbIYBzbsaOfOxzdw04PPc98zm8kIXnvkDN6+aA5/eswh3OkiAAARCElEQVSB5HO+fsHMBuZkMc48s7GVH96/hh/d/zwvbm+nqTbHyYdO4zVHTOfVh0/nsBmNrqoysz04WYxTPb3B755q5hcr1vP7lRt5dlMbAAdNrONVh03jyAObOHRGI4fNmMC8aQ3UZH32YTaelZsscvsiGNt3shlx2lEHcNpRBwCwZnMbd6/cyN0rN/L7lRu56cHnd1t27tQG5k5tYPaUemZPSf7PmdrAvKkNTGnMV6sYZraf8ZnFOLO9vYtnmlt5urmFVc2trNrYwprNO1m7pY0tbV27LTuloYZDpjdy6IwJHDK9keNnT+KVh07z2YjZGOIzCytpYl0NJ8yZzAlzJu8xr6Wjm+e37GTN5jZWb2pl1cZWVjW38Nsnm/nh/cnQsFMb85x53EGcdcJMFs+fSsZdrJuNC04WtsuE2hxHHdTEUQc17TFvR3sXf1i1mVseXsePH3ie7937HAdOrOWMYw5kSkOeupoMdTVZamuy1OUyzGiqZebkeg6aVEdTbc6N62ajnJOFlaWprobXLziQ1y84kLbObu58fAO3PLyOnz60jrbObnoHqM1szGc5aFIdUxvzTKjNMaGuhgm1OZrqckxpyDNvWtJuMm9aA03u0sRsv+RkYUPWkM9x1gkzOeuEmUBys2BXT9De3UNHVy87O3vYsKOdF7a18+K25P8L23ayta2LjS2drN7Uxo72blo6umjv6t1t29Ma88yeUs/E+hoa8zkm1OWSBFObPG6qy9FUV0NTXY6JdTkmN+SZ3ljLxHqfvZhVkpOF7TVJ5HNKbgJMbyCfO62hrHV3tHfx7KY2ntvcxrOb2nh2UyvPb91JS0c3L25rp7Wjmx0d3bR0dDPQtRg1WTG1Mc+0xlom1OXICISQQIJcJsO0xjzTm2qZPiHP9Am1TJtQS1OajBprc0zI52iszZJzA77ZHpwsrKqa6mo4btYkjps1acDlIoK2zh52tHezo72L7en/rW1dbGrtZFNLB5taOtnU2sGO9rRaLIIg6A3o6ulm5YYWmls66OzuHXBfuUyS+PK5DDXZDPlshrqazK6k0libozGfZUJdjsn1eSY31DCpvobJDXma6nJ0dPfS2tFNS3uS6No6upk/vZHTjprhajYbtSqaLCQtAf4dyALXRMSXiubXAtcBrwA2Ae+IiNWSaoBrgJenMV4XEV+sZKy2f5O064v6oEnD7/8qImjp6GZjS5JgdnR005r+7WjvprWjh47uHjq7e+ns6aWrp5eO7l7au3po7eihtaObza1ttHYmy2/f2TVge02hmqx45aHTeMOCpAPIgyfVD7scZvtaxe6zkJQFngReD6wFlgLnR8SKgmU+AhwfER+WdB7w5xHxDknvBM6OiPMkNQArgNMiYnV/+/N9FlYNvb3Bjo4kaWxt62J7exd1NZmkWiv9q6vJ8ujz27hjRdIB5DMbk+FzD5neyMzJdRw0sT75P6mOqQ152jp7aOlIzpz6klkuk5zp5LOZXWc9Arp7g57eSP/30lib46gDmzjywCZmTa7f49Lmlo5untvUxovbd9KYzzFtQlItN7GuZrdlI4L2rl7aOrvJSEyqr/Fl0mPU/nCfxWJgZUSsSgO6ATiH5Iu/zznApenjHwL/oaSVMoBGSTmgHugEtlcwVrNhyWSSL9JJ9TXMmdr/cifNn8pJ86fy6TOP5unmFn6xYj2PPb+NF7a18z9Pb2T99vaSZyj5XIbGfJae3qAzPcvp7/ddLiO6CzbSmM9yxIFNHDypjnXb2lmzuY3NrZ39rjulMY+Ats4eWjt3byPKZdI2oYLk0t3bS3dPEld3T9ATQWM+y8T09ZiYXojQE0FLe9Lu1JJWzwFMm5BnamOeqY21TGvMM7E+R08vdPf00tUbdPf00tMb1OezNOZzNOSzNORzNNRmyWczZDIiK5HJQFaiIZ+jPp8d7C0jInwxxDBUMlnMAtYUPF8LnNzfMhHRLWkbMI0kcZwDvAA0AH8bEZuLdyDpIuAigLlz5450/GYjThKHH9DE4Qfsfi9Ld08vzS0dbGntorE2u+vqr9rc7l9+EclZRGd3L1LSZUsuk0ka9CW2t3fx1PoWnly/gyde3MGT65O/mZPrWXLcQcyd2sCcKQ0cPLmOnZ09bCxo69nU0omUXO3W98XcWJuluyd2zd/Y0snGlg7Wbd1JLpOhJpfsP5/NIMHGlk5WbWxl286u3arochntdmVbBDzw3Fa2tHXSU249XhnqajJMbcgzpTFJRPU1WXa0d7NtZ9euv5aObibU5phUX8OUxhqmNOSZVF9DLiN6Ijlb7OlNkl9tes/QjKZaZkxI/jfV1dDWmZzxtaRVk22dPdRkRV1NlvqaLHU1WepqMi+djRUUsbYmw/QJtUxtzDOlIU+24Iyttzdo6exmW1sXrZ3dNOZzTKyvoak2V/Uzu0omi1IlKz4q+ltmMdADzASmAL+T9Mu+s5RdC0ZcDVwNSTXUXkdsViW5bIaDJ9UP2o4hiZqs+u1yZWJdDa+YN4VXzJtSiTCHJCJo7ewhlxG1uUzJX/O9vcH29uQihR3t3eQyIptJypjLZMhmlLQXdfbQ1tFNa2fy5dzV00tvBD290BNBb2/Q2tnN1rYuNrd2sqW1k81tnWzY3sHE+hwzJ9dx9MFNTKpP7vFp7ehha1snW3d2saWtk7VbdtLTG2QzIpMm4YySfW9s6aSlo7sir5EEUxvyNNRmB2wDy4hdZ2xZqaD6MTn7WjBzEte9f3FFYuxTyWSxFphT8Hw2sK6fZdamVU6TgM3AO4GfR0QXsEHS74FFwCrMbFSQxITagb9iMhkxuSHP5Ib9u9PKts5uNu7opLmlne07u9OLLbK7rpBryGfp6k7uNWrv6qG9K7kooregLq8vWbZ1drO5tTM5o2vpYGNrJ60d3Uysq9lVpTmpvobG2hytnUkC2Za2iW3b2UVvBDXZTHpWqV0dglZaJZPFUuAISYcAzwPnkSSBQjcDFwD3AOcCv4qIkPQccLqk75JUQ70SuKKCsZqZ9ashn2PutNzA9w/lYRJj99Loit19FBHdwMXA7cDjwI0RsVzSZZLOThf7T2CapJXAx4FL0ulXAhOAx0iSzrcj4pFKxWpmZgNzF+VmZuNYuZfOul8DMzMblJOFmZkNysnCzMwG5WRhZmaDcrIwM7NBOVmYmdmgxsyls5KagWfLWHQ6sLHC4exrY61MY608MPbKNNbKA2OvTOWWZ15EzBhsoTGTLMolaVk51xSPJmOtTGOtPDD2yjTWygNjr0wjXR5XQ5mZ2aCcLMzMbFDjMVlcXe0AKmCslWmslQfGXpnGWnlg7JVpRMsz7toszMxs6MbjmYWZmQ2Rk4WZmQ1qXCULSUskPSFppaRLBl9j/yPpW5I2SHqsYNpUSXdIeir9X/0xNcskaY6kX0t6XNJySX+dTh+VZZJUJ+k+SQ+n5fnHdPohku5Ny/P/JO3fQ8OVICkr6UFJt6bPR22ZJK2W9KikhyQtS6eNymOuj6TJkn4o6Y/p5+lVI1mmcZMsJGVJBlU6E1gAnC9pQXWjGpZrgSVF0y4B7oyII4A7eWkQqdGgG/hERBxDMiLi/0rfl9Fapg7g9Ig4AVgILJH0SuDLwFfT8mwBPlDFGIfrr0kGMusz2sv0uohYWHAvwmg95vr8O8lw1EcDJ5C8VyNXpogYF3/Aq4DbC55/Gvh0teMaZlnmA48VPH8CODh9fDDwRLVj3Iuy/RR4/VgoE8mQwA8AJ5PcSZtLp+92LI6GP2B2+mVzOnAroNFcJmA1ML1o2qg95oCJwDOkFy1Vokzj5swCmAWsKXi+Np02FhwYES8ApP8PqHI8wyJpPnAicC+juExpdc1DwAbgDuBpYGskQw3D6Dz2rgA+BfSmz6cxussUwC8k3S/ponTaqD3mgEOBZuDbaVXhNZIaGcEyjadkoRLTfN3wfkLSBOBHwN9ExPZqx7M3IqInIhaS/BpfDBxTarF9G9XwSXozsCEi7i+cXGLRUVMm4NUR8XKSaun/JenUage0l3LAy4GvR8SJQCsjXI02npLFWmBOwfPZwLoqxTLS1ks6GCD9v6HK8QyJpBqSRPG9iPhxOnlUlwkgIrYCd5G0xUyWlEtnjbZj79XA2ZJWAzeQVEVdwSguU0SsS/9vAG4iSeqj+ZhbC6yNiHvT5z8kSR4jVqbxlCyWAkekV3DkgfOAm6sc00i5GbggfXwBSb3/qCBJwH8Cj0fEvxXMGpVlkjRD0uT0cT1wBklD46+Bc9PFRk15ACLi0xExOyLmk3xufhUR72KUlklSo6SmvsfAG4DHGKXHHEBEvAiskXRUOulPgRWMZJmq3TCzjxuB3gg8SVKH/A/VjmeYZfg+8ALQRfJr4gMk9cd3Ak+l/6dWO84hlOcUkuqLR4CH0r83jtYyAccDD6bleQz4bDr9UOA+YCXwA6C22rEOs3ynAbeO5jKlcT+c/i3v+y4YrcdcQbkWAsvSY+8nwJSRLJO7+zAzs0GNp2ooMzMbJicLMzMblJOFmZkNysnCzMwG5WRhZmaDcrKw/ZakkPSVgueflHTpCG37WknnDr7kXu/nbWkPoL8umn5aX++tJda5re9ejWHsrybtwmJ+Yc/EZnvLycL2Zx3AWyVNr3YghdIejMv1AeAjEfG6cleIiDdGcvf3cJwC/M8w1x1UwR3bNs44Wdj+rJtkHOG/LZ5RfGYgqSX9f5qk30i6UdKTkr4k6V3pGBOPSjqsYDNnSPpdutyb0/Wzki6XtFTSI5I+VLDdX0u6Hni0RDznp9t/TNKX02mfJfny/oaky0uUb6KkmyStkPQNSZl0vdWSpqdnB49L+qaSsTF+kd4VjqSPpes9IumGgm0uAX6WPs72s+5CSX9I172pb4wDSXdJWpQ+np5274GkCyX9QNItwC8GesNs7HKysP3dlcC7JE0awjonkIy98DLgPcCREbEYuAb4aMFy84HXAm8i+UKvIzkT2BYRJwEnAR+UdEi6/GKSu313GwdF0kySsR1OJ7mL9iRJb4mIy0juqH1XRPxdiTgXA59I4zwMeGuJZY4AroyIY4GtwF+k0y8BToyI44EPFyz/OpL+qAZa9zrg79N1HwU+V2K/xV4FXBARp5exrI1BTha2X4ukB9rrgI8NYbWlEfFCRHSQdO3S92v4UZIE0efGiOiNiKeAVcDRJP0EvTftYvxeku4SjkiXvy8inimxv5OAuyKiOZIuu78HlNOL6X0RsSoieki6cTmlxDLPRMRD6eP7C+J/BPiepHeTnIH1Ja3NEdHW37pp0p0cEb9Jp3+nzFjviIjNZSxnY5SThY0GV5D84m8smNZNevymnREWDunZUfC4t+B5L0lXzn2K+7oJkq63PxrJCGoLI+KQiOhLNq39xFequ+5ylNp/scKy9PBS/G8iOet6BXB/2pZwJnB7Gev2Z9drCtQVzeuv7DZOOFnYfi/9RXsjuw/buZrkixLgHKBmGJt+m6RM2o5xKMmoYrcDf5V2m46kI9OeSQdyL/DatJ4/C5wP/GaQdQAWp70gZ4B3AHeXE3S6/JyI+DXJgESTgQns3l5RUkRsA7ZIek066T0Fsa7mpde04leK2ejiKxtstPgKcHHB828CP5V0H0lvmsP55fsEyRflgcCHI6Jd0jUkVT0PpGcszcBbBtpIRLwg6dMkXXYLuC0iyukK+h7gSyRtFr8lGVehHFngu2mVkoCvAjuAIyLij2WsfwFJG00DSfXb+9Lp/wrcKOk9wK/KjMXGCfc6azYGSDoFeHdEfHjQhc2GwcnCzMwG5TYLMzMblJOFmZkNysnCzMwG5WRhZmaDcrIwM7NBOVmYmdmg/j8C3yVlzYkGywAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1a2c00c208>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#Plot the number of \"hits\" based on the binning. That is, the number of measurements falling into the same bin in function of the number of bins\n",
    "bins_num=range(1,60)\n",
    "merged_df_short_binned=merged_df_short.copy()\n",
    "hits_vec=[]\n",
    "for bin_k in bins_num:\n",
    "    time_stamp_str=\"TIME_STAMP_Bin_\"+str(bin_k)\n",
    "    merged_df_short_binned[time_stamp_str]=round(merged_df_short_binned[\"TIME_STAMP\"].dt.total_seconds()*bin_k/(100*36)).astype(int)\n",
    "    hits_prop=merged_df_short_binned.duplicated(subset=[\"HADM_ID\",\"LABEL_CODE\",time_stamp_str]).sum()/len(merged_df_short_binned.index)\n",
    "    hits_vec+=[hits_prop]\n",
    "plt.plot(bins_num,hits_vec)\n",
    "plt.title(\"Percentage of hits in function of the binning factor\")\n",
    "plt.xlabel(\"Number of bins/hour\")\n",
    "plt.ylabel(\"% of hits\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "#We choose 60 bins per hour. We now need to aggregate the data in different ways.\n",
    "bin_k=60\n",
    "merged_df_short[\"TIME\"]=round(merged_df_short_binned[\"TIME_STAMP\"].dt.total_seconds()*bin_k/(100*36)).astype(int)\n",
    "\n",
    "#For lab, we have to average the duplicates.\n",
    "lab_subset=merged_df_short.loc[merged_df_short[\"Origin\"]==\"Lab\",[\"HADM_ID\",\"TIME\",\"LABEL_CODE\",\"VALUENUM\"]]\n",
    "lab_subset[\"KEY_ID\"]=lab_subset[\"HADM_ID\"].astype(str)+\"/\"+lab_subset[\"TIME\"].astype(str)+\"/\"+lab_subset[\"LABEL_CODE\"].astype(str)\n",
    "lab_subset[\"VALUENUM\"]=lab_subset[\"VALUENUM\"].astype(float)\n",
    "\n",
    "lab_subset_s=lab_subset.groupby(\"KEY_ID\")[\"VALUENUM\"].mean().to_frame().reset_index()\n",
    "\n",
    "lab_subset.rename(inplace=True,columns={\"VALUENUM\":\"ExVALUENUM\"})\n",
    "lab_s=pd.merge(lab_subset,lab_subset_s,on=\"KEY_ID\")\n",
    "assert(not lab_s.isnull().values.any())\n",
    "\n",
    "#For inputs, we have to sum the duplicates.\n",
    "input_subset=merged_df_short.loc[merged_df_short[\"Origin\"]==\"Inputs\",[\"HADM_ID\",\"TIME\",\"LABEL_CODE\",\"VALUENUM\"]]\n",
    "input_subset[\"KEY_ID\"]=input_subset[\"HADM_ID\"].astype(str)+\"/\"+input_subset[\"TIME\"].astype(str)+\"/\"+input_subset[\"LABEL_CODE\"].astype(str)\n",
    "input_subset[\"VALUENUM\"]=input_subset[\"VALUENUM\"].astype(float)\n",
    "\n",
    "input_subset_s=input_subset.groupby(\"KEY_ID\")[\"VALUENUM\"].sum().to_frame().reset_index()\n",
    "\n",
    "input_subset.rename(inplace=True,columns={\"VALUENUM\":\"ExVALUENUM\"})\n",
    "input_s=pd.merge(input_subset,input_subset_s,on=\"KEY_ID\")\n",
    "assert(not input_s.isnull().values.any())\n",
    "\n",
    "#For outpus, we have to sum the duplicates as well.\n",
    "output_subset=merged_df_short.loc[merged_df_short[\"Origin\"]==\"Outputs\",[\"HADM_ID\",\"TIME\",\"LABEL_CODE\",\"VALUENUM\"]]\n",
    "output_subset[\"KEY_ID\"]=output_subset[\"HADM_ID\"].astype(str)+\"/\"+output_subset[\"TIME\"].astype(str)+\"/\"+output_subset[\"LABEL_CODE\"].astype(str)\n",
    "output_subset[\"VALUENUM\"]=output_subset[\"VALUENUM\"].astype(float)\n",
    "\n",
    "output_subset_s=output_subset.groupby(\"KEY_ID\")[\"VALUENUM\"].sum().to_frame().reset_index()\n",
    "\n",
    "output_subset.rename(inplace=True,columns={\"VALUENUM\":\"ExVALUENUM\"})\n",
    "output_s=pd.merge(output_subset,output_subset_s,on=\"KEY_ID\")\n",
    "assert(not output_s.isnull().values.any())\n",
    "\n",
    "#For prescriptions, we have to sum the duplicates as well.\n",
    "presc_subset=merged_df_short.loc[merged_df_short[\"Origin\"]==\"Prescriptions\",[\"HADM_ID\",\"TIME\",\"LABEL_CODE\",\"VALUENUM\"]]\n",
    "presc_subset[\"KEY_ID\"]=presc_subset[\"HADM_ID\"].astype(str)+\"/\"+presc_subset[\"TIME\"].astype(str)+\"/\"+presc_subset[\"LABEL_CODE\"].astype(str)\n",
    "presc_subset[\"VALUENUM\"]=presc_subset[\"VALUENUM\"].astype(float)\n",
    "\n",
    "presc_subset_s=presc_subset.groupby(\"KEY_ID\")[\"VALUENUM\"].sum().to_frame().reset_index()\n",
    "\n",
    "presc_subset.rename(inplace=True,columns={\"VALUENUM\":\"ExVALUENUM\"})\n",
    "presc_s=pd.merge(presc_subset,presc_subset_s,on=\"KEY_ID\")\n",
    "assert(not presc_s.isnull().values.any())\n",
    "\n",
    "#Now remove the duplicates/\n",
    "lab_s=(lab_s.drop_duplicates(subset=[\"HADM_ID\",\"LABEL_CODE\",\"TIME\"]))[[\"HADM_ID\",\"TIME\",\"LABEL_CODE\",\"VALUENUM\"]].copy()\n",
    "input_s=(input_s.drop_duplicates(subset=[\"HADM_ID\",\"LABEL_CODE\",\"TIME\"]))[[\"HADM_ID\",\"TIME\",\"LABEL_CODE\",\"VALUENUM\"]].copy()\n",
    "output_s=(output_s.drop_duplicates(subset=[\"HADM_ID\",\"LABEL_CODE\",\"TIME\"]))[[\"HADM_ID\",\"TIME\",\"LABEL_CODE\",\"VALUENUM\"]].copy()\n",
    "presc_s=(presc_s.drop_duplicates(subset=[\"HADM_ID\",\"LABEL_CODE\",\"TIME\"]))[[\"HADM_ID\",\"TIME\",\"LABEL_CODE\",\"VALUENUM\"]].copy()\n",
    "\n",
    "#We append both subsets together to form the complete dataframe\n",
    "complete_df1=lab_s.append(input_s)\n",
    "complete_df2=complete_df1.append(output_s)\n",
    "complete_df=complete_df2.append(presc_s)\n",
    "\n",
    "\n",
    "assert(sum(complete_df.duplicated(subset=[\"HADM_ID\",\"LABEL_CODE\",\"TIME\"])==True)==0) #Check if no duplicates anymore.\n",
    "\n",
    "# We remove patients with less than 50 observations.\n",
    "id_counts=complete_df.groupby(\"HADM_ID\").count()\n",
    "id_list=list(id_counts.loc[id_counts[\"TIME\"]<50].index)\n",
    "complete_df=complete_df.drop(complete_df.loc[complete_df[\"HADM_ID\"].isin(id_list)].index).copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "#We also choose 10 bins per hour. We now need to aggregate the data in different ways.\n",
    "bin_k=10\n",
    "merged_df_short[\"TIME\"]=round(merged_df_short_binned[\"TIME_STAMP\"].dt.total_seconds()*bin_k/(100*36))\n",
    "\n",
    "#For lab, we have to average the duplicates.\n",
    "lab_subset=merged_df_short.loc[merged_df_short[\"Origin\"]==\"Lab\",[\"HADM_ID\",\"TIME\",\"LABEL_CODE\",\"VALUENUM\"]]\n",
    "lab_subset[\"KEY_ID\"]=lab_subset[\"HADM_ID\"].astype(str)+\"/\"+lab_subset[\"TIME\"].astype(str)+\"/\"+lab_subset[\"LABEL_CODE\"].astype(str)\n",
    "lab_subset[\"VALUENUM\"]=lab_subset[\"VALUENUM\"].astype(float)\n",
    "\n",
    "lab_subset_s=lab_subset.groupby(\"KEY_ID\")[\"VALUENUM\"].mean().to_frame().reset_index()\n",
    "\n",
    "lab_subset.rename(inplace=True,columns={\"VALUENUM\":\"ExVALUENUM\"})\n",
    "lab_s=pd.merge(lab_subset,lab_subset_s,on=\"KEY_ID\")\n",
    "assert(not lab_s.isnull().values.any())\n",
    "\n",
    "#For inputs, we have to sum the duplicates.\n",
    "input_subset=merged_df_short.loc[merged_df_short[\"Origin\"]==\"Inputs\",[\"HADM_ID\",\"TIME\",\"LABEL_CODE\",\"VALUENUM\"]]\n",
    "input_subset[\"KEY_ID\"]=input_subset[\"HADM_ID\"].astype(str)+\"/\"+input_subset[\"TIME\"].astype(str)+\"/\"+input_subset[\"LABEL_CODE\"].astype(str)\n",
    "input_subset[\"VALUENUM\"]=input_subset[\"VALUENUM\"].astype(float)\n",
    "\n",
    "input_subset_s=input_subset.groupby(\"KEY_ID\")[\"VALUENUM\"].sum().to_frame().reset_index()\n",
    "\n",
    "input_subset.rename(inplace=True,columns={\"VALUENUM\":\"ExVALUENUM\"})\n",
    "input_s=pd.merge(input_subset,input_subset_s,on=\"KEY_ID\")\n",
    "assert(not input_s.isnull().values.any())\n",
    "\n",
    "#For outpus, we have to sum the duplicates as well.\n",
    "output_subset=merged_df_short.loc[merged_df_short[\"Origin\"]==\"Outputs\",[\"HADM_ID\",\"TIME\",\"LABEL_CODE\",\"VALUENUM\"]]\n",
    "output_subset[\"KEY_ID\"]=output_subset[\"HADM_ID\"].astype(str)+\"/\"+output_subset[\"TIME\"].astype(str)+\"/\"+output_subset[\"LABEL_CODE\"].astype(str)\n",
    "output_subset[\"VALUENUM\"]=output_subset[\"VALUENUM\"].astype(float)\n",
    "\n",
    "output_subset_s=output_subset.groupby(\"KEY_ID\")[\"VALUENUM\"].sum().to_frame().reset_index()\n",
    "\n",
    "output_subset.rename(inplace=True,columns={\"VALUENUM\":\"ExVALUENUM\"})\n",
    "output_s=pd.merge(output_subset,output_subset_s,on=\"KEY_ID\")\n",
    "assert(not output_s.isnull().values.any())\n",
    "\n",
    "#For prescriptions, we have to sum the duplicates as well.\n",
    "presc_subset=merged_df_short.loc[merged_df_short[\"Origin\"]==\"Prescriptions\",[\"HADM_ID\",\"TIME\",\"LABEL_CODE\",\"VALUENUM\"]]\n",
    "presc_subset[\"KEY_ID\"]=presc_subset[\"HADM_ID\"].astype(str)+\"/\"+presc_subset[\"TIME\"].astype(str)+\"/\"+presc_subset[\"LABEL_CODE\"].astype(str)\n",
    "presc_subset[\"VALUENUM\"]=presc_subset[\"VALUENUM\"].astype(float)\n",
    "\n",
    "presc_subset_s=presc_subset.groupby(\"KEY_ID\")[\"VALUENUM\"].sum().to_frame().reset_index()\n",
    "\n",
    "presc_subset.rename(inplace=True,columns={\"VALUENUM\":\"ExVALUENUM\"})\n",
    "presc_s=pd.merge(presc_subset,presc_subset_s,on=\"KEY_ID\")\n",
    "assert(not presc_s.isnull().values.any())\n",
    "\n",
    "#Now remove the duplicates/\n",
    "lab_s=(lab_s.drop_duplicates(subset=[\"HADM_ID\",\"LABEL_CODE\",\"TIME\"]))[[\"HADM_ID\",\"TIME\",\"LABEL_CODE\",\"VALUENUM\"]].copy()\n",
    "input_s=(input_s.drop_duplicates(subset=[\"HADM_ID\",\"LABEL_CODE\",\"TIME\"]))[[\"HADM_ID\",\"TIME\",\"LABEL_CODE\",\"VALUENUM\"]].copy()\n",
    "output_s=(output_s.drop_duplicates(subset=[\"HADM_ID\",\"LABEL_CODE\",\"TIME\"]))[[\"HADM_ID\",\"TIME\",\"LABEL_CODE\",\"VALUENUM\"]].copy()\n",
    "presc_s=(presc_s.drop_duplicates(subset=[\"HADM_ID\",\"LABEL_CODE\",\"TIME\"]))[[\"HADM_ID\",\"TIME\",\"LABEL_CODE\",\"VALUENUM\"]].copy()\n",
    "\n",
    "#We append both subsets together to form the complete dataframe\n",
    "complete_df1=lab_s.append(input_s)\n",
    "complete_df2=complete_df1.append(output_s)\n",
    "complete_df10=complete_df2.append(presc_s)\n",
    "\n",
    "\n",
    "assert(sum(complete_df10.duplicated(subset=[\"HADM_ID\",\"LABEL_CODE\",\"TIME\"])==True)==0) #Check if no duplicates anymore.\n",
    "\n",
    "# We remove patients with less than 50 observations.\n",
    "id_counts=complete_df10.groupby(\"HADM_ID\").count()\n",
    "id_list=list(id_counts.loc[id_counts[\"TIME\"]<50].index)\n",
    "complete_df10=complete_df10.drop(complete_df10.loc[complete_df10[\"HADM_ID\"].isin(id_list)].index).copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>icustay_id</th>\n",
       "      <th>sapsii</th>\n",
       "      <th>sapsii_prob</th>\n",
       "      <th>age_score</th>\n",
       "      <th>hr_score</th>\n",
       "      <th>sysbp_score</th>\n",
       "      <th>temp_score</th>\n",
       "      <th>pao2fio2_score</th>\n",
       "      <th>...</th>\n",
       "      <th>bun_score</th>\n",
       "      <th>wbc_score</th>\n",
       "      <th>potassium_score</th>\n",
       "      <th>sodium_score</th>\n",
       "      <th>bicarbonate_score</th>\n",
       "      <th>bilirubin_score</th>\n",
       "      <th>gcs_score</th>\n",
       "      <th>comorbidity_score</th>\n",
       "      <th>admissiontype_score</th>\n",
       "      <th>HADM_ID</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>27513</td>\n",
       "      <td>163557</td>\n",
       "      <td>200003</td>\n",
       "      <td>30</td>\n",
       "      <td>0.106398</td>\n",
       "      <td>7</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>163557</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>20707</td>\n",
       "      <td>129310</td>\n",
       "      <td>200007</td>\n",
       "      <td>18</td>\n",
       "      <td>0.029295</td>\n",
       "      <td>7</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>129310</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>9514</td>\n",
       "      <td>127229</td>\n",
       "      <td>200014</td>\n",
       "      <td>43</td>\n",
       "      <td>0.305597</td>\n",
       "      <td>18</td>\n",
       "      <td>2.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>127229</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>76603</td>\n",
       "      <td>179633</td>\n",
       "      <td>200024</td>\n",
       "      <td>52</td>\n",
       "      <td>0.507019</td>\n",
       "      <td>18</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>...</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>179633</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>19167</td>\n",
       "      <td>164161</td>\n",
       "      <td>200025</td>\n",
       "      <td>29</td>\n",
       "      <td>0.096698</td>\n",
       "      <td>7</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>164161</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    subject_id  hadm_id  icustay_id  sapsii  sapsii_prob  age_score  hr_score  \\\n",
       "1        27513   163557      200003      30     0.106398          7       4.0   \n",
       "3        20707   129310      200007      18     0.029295          7       0.0   \n",
       "8         9514   127229      200014      43     0.305597         18       2.0   \n",
       "16       76603   179633      200024      52     0.507019         18       4.0   \n",
       "17       19167   164161      200025      29     0.096698          7       0.0   \n",
       "\n",
       "    sysbp_score  temp_score  pao2fio2_score   ...     bun_score  wbc_score  \\\n",
       "1           5.0         0.0             NaN   ...           0.0        3.0   \n",
       "3           5.0         0.0             NaN   ...           0.0        0.0   \n",
       "8           5.0         0.0             6.0   ...           0.0        0.0   \n",
       "16          5.0         0.0             6.0   ...           6.0        0.0   \n",
       "17          5.0         0.0             6.0   ...           0.0        0.0   \n",
       "\n",
       "    potassium_score  sodium_score  bicarbonate_score  bilirubin_score  \\\n",
       "1               0.0           0.0                3.0              0.0   \n",
       "3               0.0           0.0                0.0              NaN   \n",
       "8               0.0           0.0                0.0              0.0   \n",
       "16              3.0           1.0                3.0              NaN   \n",
       "17              3.0           0.0                0.0              NaN   \n",
       "\n",
       "    gcs_score  comorbidity_score  admissiontype_score  HADM_ID  \n",
       "1         0.0                  0                    8   163557  \n",
       "3         0.0                  0                    6   129310  \n",
       "8         0.0                  0                    8   127229  \n",
       "16        0.0                  0                    6   179633  \n",
       "17        0.0                  0                    8   164161  \n",
       "\n",
       "[5 rows x 21 columns]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#SAPSII data\n",
    "saps=pd.read_csv(file_path+'saps2.csv')\n",
    "valid_hadm_id=complete_df[\"HADM_ID\"].unique()\n",
    "saps=saps.loc[saps[\"hadm_id\"].isin(list(valid_hadm_id))].copy()\n",
    "saps[\"HADM_ID\"]=saps[\"hadm_id\"]\n",
    "saps.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "saps[\"SUM_score\"]=saps[[ 'hr_score', 'sysbp_score', 'temp_score', 'pao2fio2_score','uo_score', 'bun_score', 'wbc_score', 'potassium_score', 'sodium_score','bicarbonate_score', 'bilirubin_score', 'gcs_score']].sum(axis=1)\n",
    "saps[\"X\"]=-7.7631 + 0.0737 * saps[\"SUM_score\"] + 0.9971 * (np.log(saps[\"SUM_score\"] + 1))\n",
    "saps[\"PROB\"]=np.exp(saps[\"X\"])/(1+np.exp(saps[\"X\"]))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject_id</th>\n",
       "      <th>hadm_id</th>\n",
       "      <th>icustay_id</th>\n",
       "      <th>sapsii</th>\n",
       "      <th>sapsii_prob</th>\n",
       "      <th>age_score</th>\n",
       "      <th>hr_score</th>\n",
       "      <th>sysbp_score</th>\n",
       "      <th>temp_score</th>\n",
       "      <th>pao2fio2_score</th>\n",
       "      <th>...</th>\n",
       "      <th>sodium_score</th>\n",
       "      <th>bicarbonate_score</th>\n",
       "      <th>bilirubin_score</th>\n",
       "      <th>gcs_score</th>\n",
       "      <th>comorbidity_score</th>\n",
       "      <th>admissiontype_score</th>\n",
       "      <th>HADM_ID</th>\n",
       "      <th>SUM_score</th>\n",
       "      <th>X</th>\n",
       "      <th>PROB</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>27513</td>\n",
       "      <td>163557</td>\n",
       "      <td>200003</td>\n",
       "      <td>30</td>\n",
       "      <td>0.106398</td>\n",
       "      <td>7</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>163557</td>\n",
       "      <td>15.0</td>\n",
       "      <td>-3.893052</td>\n",
       "      <td>0.019976</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>20707</td>\n",
       "      <td>129310</td>\n",
       "      <td>200007</td>\n",
       "      <td>18</td>\n",
       "      <td>0.029295</td>\n",
       "      <td>7</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>129310</td>\n",
       "      <td>5.0</td>\n",
       "      <td>-5.608037</td>\n",
       "      <td>0.003655</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>9514</td>\n",
       "      <td>127229</td>\n",
       "      <td>200014</td>\n",
       "      <td>43</td>\n",
       "      <td>0.305597</td>\n",
       "      <td>18</td>\n",
       "      <td>2.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>127229</td>\n",
       "      <td>17.0</td>\n",
       "      <td>-3.628210</td>\n",
       "      <td>0.025876</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>76603</td>\n",
       "      <td>179633</td>\n",
       "      <td>200024</td>\n",
       "      <td>52</td>\n",
       "      <td>0.507019</td>\n",
       "      <td>18</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>179633</td>\n",
       "      <td>28.0</td>\n",
       "      <td>-2.341969</td>\n",
       "      <td>0.087706</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>19167</td>\n",
       "      <td>164161</td>\n",
       "      <td>200025</td>\n",
       "      <td>29</td>\n",
       "      <td>0.096698</td>\n",
       "      <td>7</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>164161</td>\n",
       "      <td>14.0</td>\n",
       "      <td>-4.031103</td>\n",
       "      <td>0.017445</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 24 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    subject_id  hadm_id  icustay_id  sapsii  sapsii_prob  age_score  hr_score  \\\n",
       "1        27513   163557      200003      30     0.106398          7       4.0   \n",
       "3        20707   129310      200007      18     0.029295          7       0.0   \n",
       "8         9514   127229      200014      43     0.305597         18       2.0   \n",
       "16       76603   179633      200024      52     0.507019         18       4.0   \n",
       "17       19167   164161      200025      29     0.096698          7       0.0   \n",
       "\n",
       "    sysbp_score  temp_score  pao2fio2_score    ...     sodium_score  \\\n",
       "1           5.0         0.0             NaN    ...              0.0   \n",
       "3           5.0         0.0             NaN    ...              0.0   \n",
       "8           5.0         0.0             6.0    ...              0.0   \n",
       "16          5.0         0.0             6.0    ...              1.0   \n",
       "17          5.0         0.0             6.0    ...              0.0   \n",
       "\n",
       "    bicarbonate_score  bilirubin_score  gcs_score  comorbidity_score  \\\n",
       "1                 3.0              0.0        0.0                  0   \n",
       "3                 0.0              NaN        0.0                  0   \n",
       "8                 0.0              0.0        0.0                  0   \n",
       "16                3.0              NaN        0.0                  0   \n",
       "17                0.0              NaN        0.0                  0   \n",
       "\n",
       "    admissiontype_score  HADM_ID  SUM_score         X      PROB  \n",
       "1                     8   163557       15.0 -3.893052  0.019976  \n",
       "3                     6   129310        5.0 -5.608037  0.003655  \n",
       "8                     8   127229       17.0 -3.628210  0.025876  \n",
       "16                    6   179633       28.0 -2.341969  0.087706  \n",
       "17                    8   164161       14.0 -4.031103  0.017445  \n",
       "\n",
       "[5 rows x 24 columns]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "saps.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "saps.to_csv(file_path+\"sapsii_processed.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'death_tags_df' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-17-9057a1bfc33a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0msaps_death\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmerge\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdeath_tags_df\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0msaps\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mon\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"HADM_ID\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0my_pred\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msaps_death\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"PROB\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0my_pred_full\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msaps_death\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"sapsii_prob\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msaps_death\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"DEATHTAG\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mNameError\u001b[0m: name 'death_tags_df' is not defined"
     ]
    }
   ],
   "source": [
    "saps_death=pd.merge(death_tags_df,saps,on=\"HADM_ID\")\n",
    "y_pred=np.array(saps_death[\"PROB\"])\n",
    "y_pred_full=np.array(saps_death[\"sapsii_prob\"])\n",
    "y=np.array(saps_death[\"DEATHTAG\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'y' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-18-fed3e42f3e7a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmetrics\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mroc_auc_score\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mroc_auc_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0my_pred\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mroc_auc_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0my_pred_full\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mNameError\u001b[0m: name 'y' is not defined"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import roc_auc_score\n",
    "print(roc_auc_score(y,y_pred))\n",
    "print(roc_auc_score(y,y_pred_full))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "complete_df10[\"TIME\"].max()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Dataframe creation for Tensor Decomposition\n",
    "\n",
    "Creation of a unique index for the admissions id."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Creation of a unique index\n",
    "unique_ids=np.arange(complete_df[\"HADM_ID\"].nunique())\n",
    "np.random.shuffle(unique_ids)\n",
    "d=dict(zip(complete_df[\"HADM_ID\"].unique(),unique_ids))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "unique_id_df   = pd.read_csv(outfile_path+\"UNIQUE_ID_dict.csv\")\n",
    "d = dict(zip(unique_id_df[\"HADM_ID\"].values,unique_id_df[\"UNIQUE_ID\"].values))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Unique_id_dict=pd.Series(complete_df[\"HADM_ID\"].unique()).reset_index().copy()\n",
    "#Unique_id_dict.columns=[\"index\",\"HADM_ID\"]\n",
    "#Unique_id_dict[\"UNIQUE_ID\"]=Unique_id_dict[\"HADM_ID\"].map(d)\n",
    "#Unique_id_dict.to_csv(outfile_path+\"UNIQUE_ID_dict.csv\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "\n",
    "### Death tags data set"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "admissions=pd.read_csv(file_path+\"Admissions_processed.csv\")\n",
    "death_tags_s=admissions.groupby(\"HADM_ID\")[\"DEATHTAG\"].unique().astype(int).to_frame().reset_index()\n",
    "death_tags_df=death_tags_s.loc[death_tags_s[\"HADM_ID\"].isin(complete_df[\"HADM_ID\"])].copy()\n",
    "death_tags_df[\"UNIQUE_ID\"]=death_tags_df[\"HADM_ID\"].map(d)\n",
    "death_tags_df.sort_values(by=\"UNIQUE_ID\",inplace=True)\n",
    "death_tags_df.rename(columns={\"DEATHTAG\":\"Value\"},inplace=True)\n",
    "death_tags_df.to_csv(outfile_path+\"complete_death_tags.csv\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Tensor Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "complete_df[\"UNIQUE_ID\"] = complete_df[\"HADM_ID\"].map(d)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "#ICD9 codes\n",
    "ICD_diag=pd.read_csv(file_path+\"DIAGNOSES_ICD.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "main_diag=ICD_diag.loc[(ICD_diag[\"SEQ_NUM\"]==1)]\n",
    "complete_tensor=pd.merge(complete_df,main_diag[[\"HADM_ID\",\"ICD9_CODE\"]],on=\"HADM_ID\")\n",
    "\n",
    "#Only select the first 3 digits of each ICD9 code.\n",
    "complete_tensor[\"ICD9_short\"]=complete_tensor[\"ICD9_CODE\"].astype(str).str[:3]\n",
    "#Check that all codes are 3 digits long.\n",
    "str_len=complete_tensor[\"ICD9_short\"].str.len()\n",
    "assert(str_len.loc[str_len!=3].count()==0)\n",
    "\n",
    "#Finer encoding (3 digits)\n",
    "hot_encodings=pd.get_dummies(complete_tensor[\"ICD9_short\"])\n",
    "complete_tensor[hot_encodings.columns]=hot_encodings\n",
    "complete_tensor_nocov=complete_tensor[[\"UNIQUE_ID\",\"LABEL_CODE\",\"TIME\"]+[\"VALUENUM\"]].copy()\n",
    "\n",
    "complete_tensor_nocov.rename(columns={\"TIME\":\"TIME_STAMP\"},inplace=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Normalization of the data (N(0,1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Add a column with the mean and std of each different measurement type and then normalize them.\n",
    "d_mean=dict(complete_tensor_nocov.groupby(\"LABEL_CODE\")[\"VALUENUM\"].mean())\n",
    "complete_tensor_nocov[\"MEAN\"]=complete_tensor_nocov[\"LABEL_CODE\"].map(d_mean)\n",
    "d_std=dict(complete_tensor_nocov.groupby(\"LABEL_CODE\")[\"VALUENUM\"].std())\n",
    "complete_tensor_nocov[\"STD\"]=complete_tensor_nocov[\"LABEL_CODE\"].map(d_std)\n",
    "complete_tensor_nocov[\"VALUENORM\"]=(complete_tensor_nocov[\"VALUENUM\"]-complete_tensor_nocov[\"MEAN\"])/complete_tensor_nocov[\"STD\"]\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Train-Validation-Test split\n",
    "Random sampling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Split training_validation_test sets RANDOM DIVISION.\n",
    "\n",
    "df_train,df_test =train_test_split(complete_tensor_nocov,test_size=0.1) \n",
    "\n",
    "#Make sure that patients of the test set have instances in the training set. (same with labels but this should be nearly certain)\n",
    "assert(len(df_test.loc[~df_test[\"UNIQUE_ID\"].isin(df_train[\"UNIQUE_ID\"])].index)==0)\n",
    "assert(len(df_test.loc[~df_test[\"LABEL_CODE\"].isin(df_train[\"LABEL_CODE\"])].index)==0)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "#First train_val fold\n",
    "df_train1,df_val1 =train_test_split(df_train,test_size=0.2) \n",
    "\n",
    "#Make sure that patients of the test set have instances in the training set. (same with labels but this should be nearly certain)\n",
    "assert(len(df_val1.loc[~df_val1[\"UNIQUE_ID\"].isin(df_train1[\"UNIQUE_ID\"])].index)==0)\n",
    "assert(len(df_val1.loc[~df_val1[\"LABEL_CODE\"].isin(df_train1[\"LABEL_CODE\"])].index)==0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Second train_val fold\n",
    "df_train2,df_val2 =train_test_split(df_train,test_size=0.2) \n",
    "\n",
    "#Make sure that patients of the test set have instances in the training set. (same with labels but this should be nearly certain)\n",
    "assert(len(df_val2.loc[~df_val2[\"UNIQUE_ID\"].isin(df_train2[\"UNIQUE_ID\"])].index)==0)\n",
    "assert(len(df_val2.loc[~df_val2[\"LABEL_CODE\"].isin(df_train2[\"LABEL_CODE\"])].index)==0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Third train_val fold\n",
    "df_train3,df_val3 =train_test_split(df_train,test_size=0.2) \n",
    "\n",
    "#Make sure that patients of the test set have instances in the training set. (same with labels but this should be nearly certain)\n",
    "assert(len(df_val3.loc[~df_val3[\"UNIQUE_ID\"].isin(df_train3[\"UNIQUE_ID\"])].index)==0)\n",
    "assert(len(df_val3.loc[~df_val3[\"LABEL_CODE\"].isin(df_train3[\"LABEL_CODE\"])].index)==0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Venn diagram of the training sets. Visualization of the number of common samples.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPIAAADwCAYAAAA6oDmvAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJztnXd8XMW597+zvalXF8lNttwtGxtjDNhUQxITQjFJgACppMMNkDcJQdc373VII7mXJPemAZcELp2XBAi9Gffeu2T13qXt58z7x5GMbEtW27O7Wp3v57MfSUdnZ+bsnt+ZZ5555hkhpcTAwGB0Y4p1AwwMDEaOIWQDgwTAELKBQQJgCNnAIAEwhGxgkAAYQjYwSAAMIRsYJACGkA0MEgBDyAYGCYAhZAODBMAQsoFBAmAI2cAgATCEbGCQABhCNjBIAAwhGxgkAIaQDQwSAEPIBgYJwJgVshDifSFEixDCHuu2GAwfIcRJIYRPCNHZ/X2+KoTIi3W7os2YFLIQYjJwMSCBa2PaGINIsFpK6QHGAXXAIzFuT9QZk0IGvgBsBh4Hbo9tUwwihZTSDzwPzI51W6KNJdYNiBFfAB4GtgCbhRA5Usq6GLfJYIQIIVzAzWgP6THFmBOyEOIiYBLwrJSyUQhxAvg88OvYtsxgBPw/IUQY8AD1wKoYtyfqjEXT+nbgTSllY/ffT2GY16Od66SUqYAd+BbwgRAiN8ZtiipjSshCCCewBlghhKgVQtQC9wALhBALYts6g5EipVSklC8CCnBRrNsTTcaUkIHr0L7k2UBR92sWsB5t3GwwihEanwbSgEOxbk80EWNppwkhxOvAASnl9844vgb4T2CilDIco8aZgGTA0f2y9fppR3vo9nxZstcrBPi7X77ul/Z7H1+uWCt6ynee8dMOiF4vun9KIAgEznj5gQ5ZLEMR+gSGhRDiJJCD9oCWQBnwUynlk7FsV7QZU0KOC4SwAdlovUYykNL98vCxgEZMl4dAYw6t1ZPo2J2H+l4y7u1uUsvtuBQRuXoAL9AOtPV6Nchi2RnBOgwGwBCynggh0ASbjdZr5ACpka5GgmzKIVidT7glE3OXB6tqwdzXuWFQ662ESu0oG5IQWz3YAyZdhlhetOCMOjRPcoMslooO9RhgCDnyaD3uRLQprjw0szXihKwoNXkEa/KQzdnYlX6EOxBhUI87CG72oH6YjLXBijXSbe1GAarRTN8yWSy7dKpnTGIIORII4QamoIl3HDo5ETuSCVbnE6qbiKU9BRumiJrIANRZCO7wEPowCfNBJ3YZWTO8N41AOVAqi2WTTnWMGQwhDxfNOTUZKETrgXW54X1OwsfnEKidgDXgwqZHHf3hFSh73QSeT8d8xImei0uagCPAMVksAzrWk7AYQh4qQqSjiXc6OpnNAK3pBI7MQ2nIxalHzztUSu0EnktH/SgJh469tAKcBA7LYlmlUx0JiSHkwaItjVsAjNezmuo8fMfnINrT9HtIjIQmM8GX0wm/lopDJydZD23AXuCo4SQbGEPI50LzOk8FFgLpelUTNqOWTcdfUogl2ubzcPELlLdT8D+Xgb3ZomvMvhfYBxyQxTGa4x8FGELuDyGmAeehw3RRDwE7yrG5+Cum4FCsw/M6xxoF5DYPvicysVTYdX0I+dF66P2GoM/GEPKZCJENXIg296sbpTPwHp6PfbQK+EwUkG+l4P1zNk6dTe5OYKsslsd1rGPUYQi5B20KaSlQoGc1bakEd12I2pkSn2PgkdJmJvSHbMLrk3HqXFUdsEkWy3qd6xkVGEIWwozmxCpCx/XZYQvqgUX4KqbgigcvtN4ccOL71TgsOgaY9HAM2CKLpVfneuKasS1kITKBS9HCKHWjahK+/edhDdnHViKHEKgvpeN7KjPi8d1nEgA+ksXyhI51xDVjU8haMMfC7pdu47kuN6Hdywi3ZOluZsY19RaC/5mLuset+3CiBE3Qfp3riTvGnpC1gI6VQKae1RyfhffIPBzSPObWfPfL+iS8v87FEdLXGeYD1stieVLHOuKOsSVkIWaheaR18xSHzag7LsLfMB7XSMtqD2D51mvcp0gsUmKensGOtSv5x3df546aDmbYzPgAvriQxy6bQuWLh5jx1718w22lCWB6OjuLV/Lq/nrSHt7Enb4wKQJkUS4f3r+cdwH+vJPz3i1ldUeQ3G+fz0+vmkYZwDMHmPXKUa5XVcwmE8qnC3n+xtkcGek1VVsJPDgRU51N97HzIWCDLJaqzvXEBWNDyJpDazkwU89qOpMIbr4U/O7IzKeqEpp92DNdBHwhzF97hfs+O5dn3jjBivnZ7P3SInb2Pv/FQ8x4/ThX/XE1v+19/EgjKRXtpFwxlfL6Luzf+ScPfOt8fn9RPjUbK8g1CeQfdnDr5+byfI+Q3ykhb0Iy7TMzafuwjPGPbOG7z63h+5G4Lr9A+eU4gluSdB9y1ANvjgVHWOKbfdq00mp0FnFVPr4Pr8YcKREDmARkuggA+MOYVYl5OB6jwkzarphKOUC2m0Cqg5qqDi3Q5cI8ai+YyFmpgC+fSsXMTNoALsqnWpFYO4ORcdY5JOYHqnF+qR69lzJmA9eLtYmfiC+xhSzEOOB6dA7uODwf767lOPtbzD8Sggri1hf58Z0v88tJKRy6ZjqlAG+VcN0tL/Lg999mTW+BNXqZesuL/Pgr/+A768sZd2Z5u2vJaPKRt3KSVs5geGwXi9KcVHhsRDSi6roW3P9Wgdeqoqf56wI+JdaKuTrWEXMS17TWQiwvRceHlSqQOy/EV5s/8vHwQFR34HzgXb5+RxFP57jpmp5BmzeE5UfvcGu6k4bilbxa14nDbEJmugg8uY+5/zjCzU/fyI97ymj0Yr/nDe69bDKv3bmQXb3Lv/NlvtfbtO5hfTnjfruVb95/If9x3nga9Li2ChuBH+ZhbtU3Zhu08M6NOtcRExKzRxZiNnAZOl5f0Iby0VUEoiFigPFJ+PJTOLqhnDmFmbSZBHhshFdMZmNVB1MAcjz4e0zxW+axX5WYT7biAfCFMP+ft7lrThZbzhRxfxxoIPV3W/nGrfN4TC8RA+QFsf/2JHKKn6BedXQzV6wVl4q1IuHu+4S7IIRYhJbTWLcAhKANZf0qlPZ0fedFS1vwVHdoDqE2P9aTrcyamELtkUZSQHOGbamiKMtNlaKa1d21Lmd9V66/rjPf9+Te9HGKFAhRUF/WVuC9+w3bLS6rveLOovx/1HdN9LcH0oJh1dqvSVvdgXPder591TReXF2I7oEWKQrWX5Rjnu5D78QC04ErxVqREDHuPSSWaS3EhYCuY6GwBfWjqwh1puiaMQOA908y4c87uVOCSUpEfopt8zeWTHnph++U/CCoyGSJEG6rs/yTM1b9zWFJC79b+u7KiraKlUIIxSRMoaLcomfnZs8t2Ve3r2BT5ab7HBZHFd0pdWdnzX5p8fjF+7dUbpx/oOHQ5xVVSTKbTF6PzVL2byun//yPO45fd7AxsDrJxqlY5rWX8ptpaXToec1egXLvJBSdV1IB1ABvyGKptxUQFRJHyNqeTrruwhc2o268kmA0Fv37w65wg3dCsMk7Xrb4sy2dwRSb5seOLg5LZyjNUR/KcNaQ6aqyJNlbdV8v3WkifPckZBTmmuuBV2OdmzsSJIaQhTgfbdGDbqgCuely/HqFW3YEUoO1nZPDTb5xoi2QaQkqTr1v4mFhFkEl2d4czHDWyCx3pSnDWWsXQkb8AdNiJnT3ZITOSQtAy+z5z9GehWT0C1nbs2mpnlVIkFsuxdeYG1nHVmcwOVTRVhiq6iiw+MJJoyIzyJlYTf5wrqcsMCnlkCXNWR/R4Ua9heA9kzC36zCtdwYngbdk8egVw+gWshAzgUv0rmb7RXhr8yIjYn/YFS5vKwxUdRRYOoNpuo+zo4nd7A2NSyoJTUo5ZEm2t0TkwVRtJXDPJCxes+5iPiqL5fs616Ebo1fIQkwBrkBH7zTA7gvwVk4ZmYjDqlUta5vpr2yfbm4PZNhAJPx6ZKelIzg+6URoatp+u8PiHZF5fNKG/95J2HTOPAKwTxbLTTrXoQujU8jaCqbr0Hmj9v2L6DpZiHu47/eHXeGjTQsDle0zHIq0JtR0x+BRZY673FeYscOS4mgadi991IHv/nwcOq9rBvhAFssRLw6JNqNPyELYgc+gbYCmG1X5+HYtH55jqzOYEjzUeH64tnOSMxae5ngl1VHvK8zYLrLdlcPy+r+ZQtcjucN/sA4SBfi7LJa6BcDowegSspae9hq0nR10w+ck/P6nEEPdT6kzmBI80LBMqe/Kc4wF83m4JNsbA7Myt5Dtrhqyj+DfJuDb5tF91VQX8KIslj6d64kYo03IS9Hya+mGBLl+FYGhRG15Q57QvvrlofqufKch4MGTbG8MzM3aKDNctYP+rLtMhO+aAlGIy64FXhkt65lHj5CFmAxcpXc1hxbQdWL24Mw3VZrk0aZF3uPNC5wSc+KFu0aJ8Z4T3nk5H9lt5sCgLKDDDnz3TYpK+qRR4/waHUIWwgnchI57LQE0ZeHfdBn2wWS5bPblBHbWXCZG6/xvvGExBcNzsjYF81OODGqG4KkMuv43U/fxMmi9cnUU6hkRo0XIV6HtfKgbISvKe59CDTrOHRYYUmzK3rqLA9Wd06Ky6mmskWJv8C8a947ZY2s/5/eggLwvn+AxfXeJBC0h/vPxHpMd/0IWYgZasjxd2bISb8O4c88Xl7cVeg80LLOFVduYSmsbbQSqOjVtn29m5jaXSaj9WkdNZoJfm4olCvPLR2Sx/EDnOkZEfAtZCA9wI+i7EubkdHz7F/c/5gqEHcrW6quDrf7sMZ3WNto4LJ3BxePfkmmOhn573U0evOsmRGVN+OuyWJZHoZ5hEe8OmkvQWcReF6EDC/s3z5p9OYH3Tq6Rhoijjz/ssW0ov9Z6snVWv8nzlnXiuridaEwTXSLWirj1h8SvkIWYis7zxQD7FxPqL/f0iZZ53g0Vq20h1WGY0jFCYjbtq7/YtbPmUq8qTX2aj1+px2KW6G1auoDFOtcxbOJTyEJYgAv0rqY1jUD9+LNNakU1q9uqr/QebFjmMiKz4oOqjumuD8uuD/jDrrMSAKYpWK9rjkqvPEesFbrtkz0S4lPI2tpij96V7F+M5IzYXW/IE3q/7MZQbecUwysdZ3QE0x3vn7yJRu+4s7aEuakZm0PfbJygLdC5UOc6hkX8CVnLQz1f72oacvG3Zp4+L13Xled//+SNJm8oJaGWFyYSIdVu2VT5SfuxpqLTxs1uFcstjVHplceLtWJyFOoZEnHltRZC3OOB79jBNg6q3oPH/wQFv4EbJJhs4P8DPP5JtIyO98B5T2jJ55kAFXvhL+9C+ufg6xJMCpg/Ce8+AR8C3A2Ln4RPqGCakM6udat4uafu8rYZ3j11lxiLHEYR+cmHuxbkfngqKGTdR1y2uYGLpQCSqOIOHmcHBWzmBiQmzPhZzePMoIFHWUMjhQCo2AiRxI+5G4DHuZ465gEwn1e5hu0ArGcmm7kBFYmPSuAOKeNjw/W4EbIQYoIJNtTAz7MhPAe+uhz2PQef+DP87gaovRVWHIQpO+HxVyD7i/DV9fBwIXh3QdJC6GgDswoiDcIVYJ8DxW/Az5IhfAE88BH8e/oU6le28I2LJrH5ptkcLm8r7BaxESc92shLPtJVlPuB+1ADqcXvc//tn+W+/y7Ayu/4Kvns4wCf4Fp+x2xqeZEV1DOFu3j8tEKe51KayOdr/A/vMo+9XM43+U/8WPhv7uXLPEwafh7iJ6fK+j3jqWeylPKOmFz4GcSVaW0FVxPYvGAKgi0f2gTIBjSHVAc406EV4Fdw8afg/ULwAixEy+6YAkoa2o4I7WCR3YkHNkJmJtTPE3QcXoC1MJNDWypZVNY60xDxKKaivdC9q3alF0CCaU4zyTldKCjYSKYNkHR1OzQDOHFp989plHE+s9gKQAPjyOYoVlSSCJJMJVuY033mx2V5mIGW7ysuiJtpFQltN8M78+EhC4Smw8EH4KAbnrgbvn0vhOzg2w4PAdR0bwMzEe5XwfQ1+EcxHAD4ENJugG+3QtYaeGEZtKVD6B7IfTQPd5oV/756FvpCVvbWX2yIeJRT2T7DNT7JVL1k/Ltv3v1PfibeIEgmh1jBQWw8wet8mzcJYcHHV7X75xSlpOMngws5DEAulWznU3TyNl3YaKaQNGoAWHFGWZ/j6qhfbD/EjWm9Xoirb4PfvAx/nAq+JfDVy2Dne7Dwu/DGXVB6PVxVDjnb4a/T4VtmULbBHzdB6vVw/y741+l87PDYBCnXwzdegd+eBx3/RzD3j3Y+JQQy2e480uKzjbt57s3/FcvrNogMrf5W1+vHXv7K/70s8JdcD75b3uHLoQJ2U8pClvIGSyjlGa6ijRy+yl9PvfFpVtFFGl/i6VPHnuIaKlmMjQ7sdJBJKTfxLr/lrtPKaiJV1skrYnG9ZxIfprUQzhfg05nQuAA6k0BZCbt2wrQamHgX2oZj34HtpTANIB1aLoHdSaBcBU2ZUPv+GZu1LYO2cVD9lLa7AF+eydYnPsNDD66YV2wz5zW5rK76M5tiMDo51HBols2S1NLku6LeYUGZm8U2qplGBxNZ0r1h3VK206LdP6eoYAlzu83qHj7PP7mfn3A3v0ECmdRTi+essjqYLdaK/Khc4ADEh5Bh7kxoLYWptWBTgC0wcyrUBMD5926B/hVmZWsLvvkE7N6G5nXcB54myLkQGtdDaj3aCqYj4CqFgsVo24a+lUnaydZZ3o0VszLL28pXzsuZtz5G12sQYVIcKc0dgY6pJ1pyU7dXX+ZtrWe+NYUqFJwc6X7A72EWbu3+AeAwOYRwsYSSU8fCCOq7l0fuZQIdTOQiDpKBt5+ydM2nPlhib1pr6XtuAVxXw+qtsNgE6kSoeA+e+CnMfRSuFSCd4P0T/M8qaFSAS+GmAzBHgHoLvPYfsP3fYdZvtLXLEhCfhPceh/WN2fjney1fb/aZJwFMz5j+yoV5F26P3YUbRJrXjr22uqGrYTGgpjpE6YrP+v/07GGWsotrAYkVL6v5HwpoBOBvrEbBwu28dKoQLxYe4QEAzPi5kr+xgEoA3qKon7KelcXybCdaFIkHIU8CVuldzbtLUzqeV653jd1slmMNKSdPfKfrR0Ulbql/5s29slhu1rmOcxIPpvVMvSvotNjCLyurbYaIxxJClFetdM1oyux35VQEmRHrrVpjK2QhXIDuzoIXMm/x+qXLCLscY6jSYirYfxMoHr33dXKgcwabgYh1j1yIzjtFHOTOrr3pibU1i8HgGdehOm1l95+1yEIHdLcsz0WshVygZ+F1LPHtsV9h6XR2GkIeo5iQpun1qYKG27t0rmqCWCtilnwidkIWIhlI06t4L1mhndxrK8sqP2v9qsHYorC93ETz9S46z9OzZxZEYZjYH7HskSfrWfgOfqAouMy1qbVxuc+wQfTICLTbHeGQSs29FhSXnuPlSTqWfU5iKWTdLrqcK71tTHf4Lf6wz+6L2zxLBtFBgJjobQiieizU3xXQsaqJYq2IyfqF2AhZCAeQq0fRIVzKQb5kA2hIaQjpUYfB6COvq14LmGhf4cQ3XS8xW4AJOpV9TmLVI+ehk7d6P3cFwrgtAPXJ9fGxIsQg5ozzNXX3lCZBzfdACr3ujZiY17ESsi5PrRYKA1WsOOU5bEpqMsbHBgB4wn6bK+zXHJ+hCXZartMrLdCY6pFzIl2gRMjd3CN6UvV4bd5QwBowhGxwivzOuo+HWo232Amn6jGjkSTWiqgnboy+kLXxcUqkiz3B9b4uJpxybNUn1xvjY4PTyPP2GmpJu5na7+q1n1PEO6qBiEWPHPGL9JMePsrnTgv6qE+pN7J+GJxGrq/5dAuta7GLroV6zC0bQh4Oe/h2UMV+2oKIZk+zYVYbnIZTCVqTg52nW2q13zEhzZF2fBlCHiqtTA80sPi0MUnAElBDllDc5CMziB+y/a2nj4vDmTaaI+74yhRrRVRX2sVCyBmRLOwInz8rUqfL3mWMjw36JDXYeXbv2/Jpa4Sno8zo4Ac6F9EVshBOIri7oo/McAMLzwpU73B26L1szWCUkhrsPPugkmal46JIj5UTWMgRvrhj3BwA81lOrQ5HhxEIYtAnyaGuvk3epjWRdo4aQh4MYRxqJZc6+vpfl6PL8Fgb9Ikn5OtbyMHJDnyFkQzdTI1gWQMyaoVcymr/mZ7qHrrs/Tx1DcY8DjVksajhvndtbDzb3zICjB55ICRClnJtv9NLPpvP8Fgb9EtKsKvviC7vAiehrEg5ShNayO6BTxmYKlb4g6T2KeSAJaCoJtXokQ36JS3YnzPULGj6bKSivRzRTMgXbSH3OaYdKse5qd8xcJe9n6etgUE3qcHO/jdEb1/hQHVEasP0iNzvgyHaQh5x7qxmZgU6ye/3AwpagnrvWm8wyrEr5+h0pd1My+pITUUloJC1HSVGPIdczqpzOiTCJqNDNjg3Fqmce1aj/dJI+ViilvQxmj2ynQgkE2hg0TljqFWTaswhG5wTsxzAaAvm2SK0xDFhhTwiOsgLBkg7p5AVkxHUZXBuLOog7pGO5ZFweiWgaR2BTdWruXjAp6QhZIOBGLBHBui4MBJBRVGbBo2mkEds8taxdMD2GkI2GAizVAcWqX+mXYfljboR650mBk0Qj9LO5AHNc8WkjJoP3yA2mAdydgFIm4muRSP1XkftXhw1Qq5lWaAnH9e5UIUx+2RwbgbVIwN0LB81ncKoEXINFw7qPMO0NhiIQY2RAbrOG2mWGaNH7o2KWTYzd1Beb7MRnWkwAKowDU5gSqqVwCS9EvRFlGguLhh2V9nE3ICCY1CufKsyulN1vXjoxdtbfC3zLCZLx+1Ft6/t/b+3Trx1ZWlr6Y03zr7xe+nO9M6Pyj86v6Sl5GoAkzAFlk5Y+uT0jOmVADuqd8zZV7/vZsA0zjPuo1UFq14HeOHgC3e0B9pnmE1mH8AFEy94bEbGjMo3T7x5VW1n7VIAKaUpoATG3TT7pn9Jc6Z53y55+/Kq9qqLAemyuqpWF65+3GFxjNrIm4BpCLd9+8VhssqGG8gUtXFeNIU87LWeDSwa9EPAqlhH9VrkgvSCjXaz/b3NlZvv7H28pqMmrdHbONtqsjb3HEt1pDZeW3jtL1Mdqd7t1dvnbKnacuv0jOkPKaoi9tXv+9zlUy7/TY4np+Xp/U//sKSlZM/UtKk1ADMzZz6/LG/Zzt7lXzXtqjeBNwG2VW2bf6z52BVpzjRvbWdtakVbxeWfnfvZYqfVGXruwHNf3Vq1dcklky7ZFIWPQxeCJuvgBeZdIOBvw61Kz32mTiOapvWwTZR2pgxanLawbVQLeX7O/GMem+esvXzXl69fs3j84hfoNe6amz23JNWR6gUoSC8oDSrBNIDDjYen2M32hryUvEab2abkuHO2HWs6tmCwbShrK1syPmn81p6/JdLkD/utYTVsUqRi89g8bSO6yBgTNA/BagtOGImJFzUhR69HllJFiCDDiLfuIH/Q7RztQu6LrVVb59vN9tbpGdMrPyr/qM9ztlVtW57qSN0P0B5oT7Vb7Kd6bpfV1dLka5ra8/eRpiPXHWs+9qlUR+rhVdNWvWi32E+Zyd6Q19bqb517+ZTL/xcg15Pbmp+S/+bzB59/yCRMoRRHysFF4xYd1O1io4DfZBu8E0pNshBOVrC0D8f5kpA9MsCQ046GsasDhWX2xhl0jgoH3mDxhry2o01HP3HplEv/3t85e2r3FFZ3VF+0cvLKF7sPnfUwE2hZIi+edPFLty+4/cGb59y8LqSEXO+Wvruq93m7a3fP99g8x9OcaV6AVn+rq76rvuj6Wdf/8AsLvnC/oiq2j8o/WhrBS4w6XVbH0B72gWnDTTag1/5SZxH3Qm5namgw88c9OEKOhHJbV3dUZwXCgcwXDr7w48d2PbYupIbSXj788o/qu+qTAY41HZuwq3bXbSsmr/hdujO9CyDZntwSCAfSe8rwhrxpDoujFSDbnd0mhMBusYcL0gs2tgXapvSur6KtYklect62nr8PNRya5bA4GjNcGZ1Ws1UZnzR+V6O3cVp0rl4fOi3OoQnZN2M4jlpJFIUc7ZQ4Z439BqKNqUP6EK2K1SxUoUqTTIieuSC9oKogveDenr8f2/XYuk/P/PS6dGd6Z3VHdfqGig1fXzx+8aOTUyfX95xTmFl4cmvV1uzK9sqMbHd2a11X3ZLlecv/DFDfVZ+S7c5uk1JS1lZW5LF5qnre1x5od3YEO2YU5Rb9pedYiiOl+WjT0anekNfmtDiD9V31M1MdqWXRun496LIM0WoLTBvOcM0ni2XU5pGjLeQhO0k6yR/yh2FTbErAFBiVQn7uwHNf7gh2zAirYc+jux792bS0aX9fMXnFhr7O3Vy5+ZNhNezeWbPzlp01OxEI5bYFt62zmCzqvJx5//t2ydt3AyLXk7thWvq0GoC3S97+UkgJeQDhtrkrri64+sme8nbX7i5KsaccdNvcpxyTs7Nml55sPbnjuQPP/QhQ3TZ3xUX5F63X+WPQlS7LEK22UM5w7qXWYbxn2AgZvYcGCFEAXDaUt2zm37yNLBzSNpUfFX7ka/G0nJW43sAgLEzqo9M/OTRhmjrCTP/8UDu9g7JY9u2Z1IFo91pDfkp5yRnymDfFmzJqYmQNoku71T30aVA1yYI6hLlnjaj2yHEvZD8ZQzb/07rSEm4KyiAyNDpShhdtFRo/VIdXVOfaoytkKcNAH5vv9E0Qj9JfEvpzkdaZZuS1NuiTOkfa8N4YHD/UkNSE7pEBWgZ7YhjnsJ6e7qDbalKNZVAGZ1PvGOZDXnUNZbgWksWyY1j1DJNYCLlusCfKETjVPX6PsbWqwWmoCLXFnjS8kEt1SAGJ9QOfElliIeTawZ6oMIRQujNI9aYaPbLBabRbXSFVDD646HSG1KkM+h6PFLEQcj2DXN6ljkDIaZ2Gw8vgdJrsKcN/uA+tRx4DQtYcXk2DOXUkPXJal+HwMjideucIdjqVg74XVcaIaQ2DfGKpI9iYIsmfZLOFbMY42eAUJ925w1+SKAd9LzbLYhn1+y5WQq4ezElyq7XUAAAYuklEQVQKI8v2kd2ebQjZAIB2qyvYYXMP/4YavGk9qHs70sRKyFXAwMnmsY8oQmtC84RRGW9tEHnK3DkjTE006JHayZHVMzxic6Nr4+TKAU/DMiIhZ3Zk2s2K2fBeG3A8eeLIfCbqoDpzP0OYXo0kseyxTg50gjLCzRtN0iQyOjKilqXBID7xmW2hBkfqyG4mOajMMyejuXSxN7EUchkDTEOJCCQhnNAywZiGGuNUuCPgKxncxgcnR1zPMImdkKUMMID32knjiNuX25prF9LYfmIsczwpAr4SS9NAPW0IzfcTE2LtDDpxrn86qR9x+yyqxZTalWqY12OUkDAr1a7Mke9TbKkfyLIrk8UyZv6YWAv5ONqTrE8cNEUkqCOvKc9YnzxGqXBnB4YfltkLa+NAZRwacR0jILZCljLEOXplC36TicCIn3ITmyY6LIpl1O6MYDB8dqcXRCYZo7X+XOW0ymJZE5F6hkmse2QY4Elmo2PEQjZLsym/Md8wr8cYjfZkf6MjdeRmNYCl4VxCPhyROkZA7IUsZQPQ2N+/7bRExFE1rXaa4fQaY+xKnx6hIVVIxeztT8gKcDQy9Qyf2AtZo99e2UFzRL4MR9hhyW7LHunG1QajBK/ZHjrpGTeojf8GxNx+LquwVBbLmN9X8SLkY/STzNtBU8R60Rk1MxIqeb1B/+xPnRKSQkQmhsDSdi4h741IHSMkPoSshWz2+YE4GNBbOGhSvan2ZG+yMVZOcMLCpBxImxKZ3hjA0txfZ1Iui2W/w8JoEh9C1jiIFqt6Gi5qIxqZVVBbYIyTE5zjSRP9IZMlcve2pd/h3c5+jked+BGyNhW1/8zDqRyN6M7l41vGO+whu7G8MUFRQe5KLxhZXPWZOA731ZlUymIZ9QQC/RFvWTT2A/PptfWqmzqLlY5wiCRLO+2Wb/Gt+xQUi0SapzN9x1rW/uO7fPeOGmpm2LD5AL7IFx+7jMtOra56gzcm/Zbf/uA6rvvjl/jSztJjpQVvh97+Qs//A+FA7vyc+X9aOnHp7if3PnmfIhUHQFgNJ7mt7tKb5978X6UtpbkbKjbc7gv58ienTv5/V0678q3ofSyJjz/stzx34Ln7JNIipTRnubN2fGL6J/7xwsEX7mgPtM8wm8w+gAsmXvDYjIwZp77bQw2HJq0vX/+Dednz/rgsb9nOE0kTfB0tJRPZ8h+3EepKByTLv/8IuUVNbPzlSqq3XUHIm8U1j3yP1Mlaaubt/30+5euvBsBkCbDgjieZcunHq/Pcu/p6MMRNbwzxJmQpgwixF1jc+3AqR4INLLZ48IQf5uGHM8kM+PCZv8bX7nuN1/YDrGLV81/iS2d9uEGC4hmeuSGX3AM9x77m+1rZjNkzHuh0dtpbfC2uFw698O/zcuYdBLhl/i2/6Dnv2QPP3jXOM243QJI9qeu8cec9XdJSslCnqx/T2M328GdmfeZhj80TCCkh8zMHnrnvQP2B/QAzM2c+vyxv2VnfraIqYlftrhuS7ckHQBsbb8qaY+fVu+6k4OrXmHPzIbyNdoRZM41zF55g6hX7WL/ue6cVlDyxkSt+9kuSJ3rZ99Qcdj92K1MufQgAc2sIa+OZVmGVLJZRz8t1LuLHtP6YvZyRxD6z2w9mwkQmmQEAP36zimoWZ28FfBq/5teXzWb2Theu0/IMzyufJwH21O05L9WRut9ldZ22lUhHoMPeHmgvLMot2g2Q6crsmJU1q8wkjHzZeiCEwGPzBABCasisStU8kNP5vZPvXZbryd1pNVk7APalTg34q7dNREoTc27WpjRdmQGcadp3O/XyCnKLzs4XN+NTJSRP9AIw6ZJSQr6Ps9g7jp45DFOBTcO7Sv2IPyFrHuzNvQ9lsvuU5RAkKG7l1h/fyZ2/nMSkQ9dwTSnAW7x13S3c8uD3+f6aTjotAIc4lHqYwwu/w3c+OLOazM5MR0ZHhq+6o3rJpJRJW8/8/+7a3QuT7cmHk+xJMZ8jHCsoqiKe2PPEj5/c++Qv053ph2ZnzS4FONJ05Lon9jzx4N+P/H1NIBywANR21qbWddYtXDFpxQcAIWFSdmVMd9B8PAezzcerX7+LF299gHd+cANKcPAO0z1/XU7KxI99Na6zJlMOymLZPNJrjTTxJ2QAKUvolfsohVKbGb8CYMMm/8bffvJ7fv/9Gmomf8iH47/BN156iqce/AN/WOfH7/oVv1oF8AiPrLmJm16w9ZON01nizPCFfBOKcosOnvm/yvbK8/NT8s8SuIF+mE1m+YUFX/jJmjlrvt8eaJ98ovnE+IsnXfzS7Qtuf/DmOTevCykh17ul764C+LDswzVFuUUvmE2a2VzjSA2HTRYTqmLC21DAoq88z7V/WYe/NYvt/33hoBpw6IVC6vdexAX3vHjqmHtn7+GnH9geuSuOHPEpZI2N9Eo8kEzJaabveMb78sk/uoENcwopbDNhwoMnvIIVG6uomgLQSOPkx3n8K2tYs66MskWv8drnH+fxop4y3gq/dUGOJWeb1Ww9zVxu9jW7vSHv5KLcon06X6NBH6Q4UnxpjrSjJS0lc7Ld2W1CCOwWe7ggvWBjW6BtCkBXsGvy1qqtX3ls12Prmv0ti9oqN93J7seLSBrXgiO1gnGLGrHYVbLn7aKtPH/ASk++P4EDz97G0rt/R0p+FwDCq2Cv6O3o2iaL5dB3c4wC8eXs6o2UzQhxEJgLkMledSdOjx27Mp7xvjbarCc5OetyLn/9CEdSCilsU1HZwpaiLLKqAJ7l2R/2FPddvnvHfObvvYM7dvcc28/+81eHVr9gD9qDAVvg1Be2p3bPeamO1L0Oi8NYMRUlmrxNHovJoqQ4Uny+kM/a7GueNSNjxuv1XfUp2e7sNiklZW1lRR6bpwrgzoV3/hC06aZHj79+u8xdtJ+iO3ajBAX7nnTRUuohbUonTUdnkjyx7JyV1+1LZ/t/fZ35tz3KxKUfTyk5TgSBnn22G4mDxRH9Eb9C1tgK5APJmew2l5Gb8mf+fKdEmiRSFFK4/TZu2/dFvvgvAQIeiRQZZFQUU/zkQAXvZneGD1/aZ/jM8aayJnXr9I+t6OqO6iUzM2e+3vv8+q765FeOvvIjVaoOQD6669Erbpp9U7Exho4Mzb7mlE2Vm+4ETFJKke3O3r5kwpJ9T+176l9CSsgDCLfNXXF1wdWnfbcHUqd41d7BH2abpPDa53nvgX9BAu7MchZ/fT0AG35+GdXbVhEOJPPmvQ+SkrePVb/+K7sf/SRK0M3+p29h/9MghMJn/roO174ei1AB3otVPq7BIGT8tk1DiGzg0ypm/slzUmLVZTiwZ9KervLMcrceZRvoQ7PN439x0gp7RBIH9EX+vQGcR+zAZlks4yKmuj/ieYysIWU9sNuEIlI5pluc9LzyeS6Pz2PEYY8SQsKsvD5hqVk3EQuvguO4DagB4t5XEv9C1tgBNObxtm7mg0maxPnHzzcZ+yqPDj7MWRDstLoiGr57GklbAgglRJyb1D2MDiFLqQLvjudDi4mgbose3EG3dX7ZfKNXjnOOJU3wnkie4Bz4zBGQ8k8zsFEWy84Bz40DRoeQAaRstRDYmMluXZ1Lec15rvHN47161mEwfNqsrsCHOQsit0SxL8wtIVyHSmWxjHnmj8EyeoQMIOXRZE4e07uaopNFDmfAGZfzhWOZsDCpr09YKhSTWd/71nGkHtigax0RZnQJGTjOmq31OFv1rMMszaalx5ciVCPHVzyxMWuuv83miewSxTMJWsJUXPJhLHNUD4f4n37qg3Gi67yH2Dgvg4CuX2pdSp1v67StDgZamWGgO3tTp3Ztzp6j7/SgIlQ+Wlwhq3Le0LUeHRh1PTJALe6jP2KZ9KPvTos5bTnOBWULfEhG39MugTiaPNGru4gBthQFqM45rns9OjAqhSwlHVV4ataxOKSgr8jym/Jds6pm9ZkY0EB/yl3Z3vdzF7p0r+hAQRdlE1SgVPe6dGBUCrmb3bvIcjzCAp/eYi6oK3BNq51meLKjTK0jzffmhCX6TjMBlE70snemG9gnZQS2AI0Bo1bIUlIP1LxDnusXLPKHB7nv5XCZXTXbldeY16VnHQYf02xL8r828QL9wi97ODq5i81FLiBAjPdvGgmjVsjd7ALYwHjn/2VJMKSzmIvKitw5rTlGz6wz7VZX4O95y63hSGbC7IsDBV3smNsz9t4vZf8bCsY7o1rIUlJJ93YzO8h2/JgLggH0DbFcXLLYmdmeaYhZJzoszuDf85abg2arvpsJ7JrVY04DhIED5zo93hnVQu7m1PriA2Q4fsCFYa+O3myTNIllx5a58hvyDTM7wjTYU/wvTFph9loc+i2vlUi2zfVyeFpvB9ohKc/OqT6aGPVClpIS4NRi8GOk2u/lIqUDq65JARaUL3DPqpzlNaamIkOpJ9f7cv5Fdl17YhXJpoV+jk/uLeIg3UO00cyoDAg5EyHIBq7rfSyHrtAv2UAqQf1WyAC1KbW+HVN32FWTOuofirFid9q0rq1Zs6MR7BGgOudML/gmKeN/meJAJMTN1+3BPm0ivw639btcImpx6bqaKbct17n8yPKQNayvBZCIqAj1g5wFXt1FHDIrvL802IeI2xjlY+MeEqJHBhACD7CGM9IXWVHUb7HXfxlVugYV+Ky+8IbCDarP7tM3FjhB0BIDnB+qcWXqu5KpOdnPh+db8PU57n5DSs6dz2uUkDBCBhCCJUCfO0Esp9r3HfbYXCi6jcHCprC6ddrWQFNyk/5BDKOYDosz+NrEC9B1AYSK5PA0H3tmOqHPTPfVUvKKbvVHmUQTsgW4EUju6//p+MMPsjU8jXZde4GS7BLvwQkHHdIkE2LoEkmOJk/0rs+e79B1KaLfFmL9YpXGdHs/ZyjAC1Ki6yq6aJJQQgYQglxgNfS9Ykkg5Rc47P0MJ1zmfs6JBJ32zuCWgi3S6/D2dzONKYImS/i93IWhMk+uvtZKTZaXDYvshM7p/d4sZXxsUB4pEk7IAEKwDJh3rnNm0Rz4IdtNenq1VaHKQxMO+UqyS5xjeSlkhSvL+17uQrvfYtdvakkRKrvmBDg2eaAHRR3wd5lg04aJKmQLcAOQcq7zXISU77MzsIgGXR1hbc624Pap28dc7xw0WcLrs+eHdM+v1eEK8MH5go4Bx9xhNJO6Tdf2xICEFDKAEOQA1zII83khDf6vsV9MoEs3oalClUfGHfGW5JQ4VJOqb/hhjJEgSz3jfOtz5tsDZpt+1xqwhtk3I6j1wgNs3aiREHPGfZGwQgYQgkWcsdfyubiUSu+dHLKmEdDN3A5YAsqhCYf8lRmVTikSzxlW7czwbciea26xJ+vnkQ6bFI5M9XOgwIky6IUVFVLyT93aFGMSWsgAQnA12rYzg8KMKq+jxHcTx21uwrrF/Hpt3tD+vP2hupS6hBg/N9mT/Ruy5lLrytBvRkAVKqUTfeye5SA4pJ6+HXhJShI21fFYELINuJ5+pqT6w0FYvY3Dvmsoc1rRr+dsc7YF9+ftV5qTmkfl3HOHxRnclDVHOZk0Tr/2SyTV2T62z7XiHXJS+jDwspScvcF5ApHwQgYQgnS0WOwh97CpBMJf5kDwIqqdek5XNXoa/QfyDtDu0neOO1J4zfbQ9ozC0JGUfKcUgxqfDo+mVB/b5ppoSR2u/+I9KdE9hXKsGRNCBhCCAuCy4b4/HX/4OkqCV1BhSyKkm8nd5mwLluSUhKvTqu3x5hRTQVa7Mn37U6eayj05+j1wFJNKZY6fQ9PMIxAwwAEpR1d+6uEyZoQMQ3d+9VkGUl5Etf86SplBq27mZNgUVivTK/2l2aWmTmdnTHtpr9keOpKSF9yfOsXu03OtcKczyLHJYY5PchAecXaQMuDNRJsv7o8xJWQAIbiQ7s3TR0oW3tDVlAdXUmnNxq+blzYWvXTUet+gJUxlboBjky00j6j37U0t8JqUjJkVaWNOyABCcBlQEMkyp9EauIZy5XzqdJu+CpvCakNyQ6AmtUY2JjVaArbIJugPCrNS68wIlHlyKPWM0y8SK2RWqMsMcCJfUJ3tGOQc8GBpRovcGlNb/oxVIZuAq4GJepSfjj+8kIbgAhqZTbMlC5/VpIOjrMvWFapNqw3VpdSJFneLbai9tQqy2Z4cqHBnKyc9uZYGR5o+ATF+W4im1BB1mVCTZaE9SS/rpQPNQz3mcqqNSSHDqTDOa4BxetflIqQsoDFYRKM6lybzBDrtkfaAq0KVze7mQG1qrdLqbjV12bssQevpceQhYVbabO5Qkz1FKXdnmyvd2baQHpkquxxBGtLD1GVCbeZwpoyGQyfwipS0R6GuuGPMChlOiflKIC+a9doJqzNoDY6nS83GJzPwi0x8Ip2AKZmgyUPQMlyhKyBbsYcacCqlZlv4gDtMmUeKxiyL7ExXLXh8Jpx+K+YRpCYKWsIEbAp+u4rXIfE6JV6noNMlaEq1EdBxcUTftAGvSsmo2MtYD8a0kOGUmX0ZMDXWbelBIGUKQSULn5KNV/UQkkHMBDGJIGbCmEQQE0HMovuYCCMIYRZt2MxyMGNOW1DBrEjMqsSkyu7f6XWMU38rZoHXYaLLacLnsCB1Tho/NJrRRDymt/UZ80IGEFqI5MXAzFi3xWBI1AGvJ3Lo5WAxhNwLIVgKLIh1OwwGRSXaPPGYmWI6F4aQz0AIZqD1znEVVWVwGnuBLWMl2GMwGELuAyHIAq4C9N+T12AohIEPpWRU7mGsJ4aQ+0EInGge7dxYt+Vj1lwO71wMQsK4KnjvcfjLNHj4RlAskFcGG54AlwrfPB+evVp7ny0AP3sSbq2ERgvMuw/CFlDNsGQHvP4P7byfFfZdVlzQiWZKN8a6IfGIIeRz0O3RvoAIhXSOjA2p8In74VgxZIdgzldh2X545lp48mG4th6uvhYmNMFfNsAjU+GqWij0wr/OgT+uhuqHtASS1XbIC0CHGQrvgweega+chPSf9l1WzKkC3h3rnulzkXAZKiKJlKhSshF4HeLhJlJN0GQFrwmCNvAEwRzWhAew6iB8sEj7/dslmogBPlcK7Wna72Y0EQN0mUExa7fBQXf/ZcUMFS3j5ZifXhoIQ8iDQErKgeeBk7FrxfJW+OSbMP8hyPgFOH3wq+2aefznSdo5L5wHrWlnv/eHy2Hm/o//9gvI/jHk/RLmHIK7SmFu5+DKihpNaFk9EiptrV4YQh4kUuKTkjeBdyEW85ZHXLC5CLb/EOrv13rkby+Fn/4J1q6BcT8Alx9MZ4xpf14I718E//Pix8ccEup/Age/Dycmw9PjtZ56oLKiggrsQBNxQmf1iCSGkIdIt8f0GeAQRHP64w+zILMRFnRCkgIrd8GuafDNEqj4BdT8FC45Bln1H7/nyQmw7jZ47Hcwp4/9nKf7YM5ReG6O9ve5yooKVWjpandISbw42UYFhpCHgZT4pWQ98BLa2tcoMKMZSqdCrU1zWG2ZCVNqYFeS9v8WC/x5FXzhA+3v99Phm1+Hnzz68bgXYI8HjnUnRKi3wr5ZMKv7GvorS3fa0TzSr0pJS5TqTCgMr3UE6E4jtBTd552vXg1bF2sm78QKeO8JuP7TsHc+SAFXfgDPvKOdu/g22L8Ikpu1v00K1K7Teul77gRp0t6zdDu8+qp2zqU39F2WboSBncA+KVF0riuhMYQcIbpXUs1CC/HUdeeKBCAMHAT2jsW1w3pgCDnCCIEZKASKAE+MmxNvBNE2Ft8nJf5YNyaRMISsE93BJNPReujUGDcn1viB/cD+sZaCJ1oYQo4CQjAOzeyewthajFGF5t0/aXih9cUQchQRAjtaLz0LiGWwhZ54gSPAkbGadicWGEKOEd27X0zufmXGtDEjpwMtj/RJoMZYXhh9DCHHAULgQRP0JLTVVvFufku0EMoyNLPZiMCKMYaQ44xuJ1kWkIMm6lwg1vtBhYB6tNQ6tUC94bSKLwwhjwKEIAVIB1LQPOCp3b9HOg91GC0jZRvQ2uvVZJjL8Y0h5FGMEDjQoskc3S97r9+taCl1e8JwJdqCBAVtOsiPtvij53ffWE4nO9oxhGxgkAAYiyYMDBIAQ8gGBgmAIWQDgwTAELKBQQJgCNnAIAEwhGxgkAAYQjYwSAAMIRsYJACGkBMcIcTnhRDbhRCdQogaIcQ/hRAXxbpdBpHFEHICI4T4F+A3wDq0RRj5wO+BT8eyXQaRxwjRTFCEECloGTrulFI+F+v2GOiL0SMnLsvQFk+8FOuGGOiPIeTEJQNolFKGY90QA/0xhJy4NAGZQghLrBtioD+GkBOXTWjrjK+LdUMM9McQcoIipWwDHgR+J4S4TgjhEkJYhRDXCCF+Huv2GUQWw2ud4AghbgHuQUvB24G2Zem/Syk3xrRhBhHFELKBQQJgmNYGBgmAIWQDgwTAELKBQQJgCNnAIAEwhGxgkAAYQjYwSAAMIRsYJACGkA0MEgBDyAYGCcD/Bze43FAoNjufAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1a21e41fd0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "from matplotlib_venn import venn3\n",
    "\n",
    "temp12=pd.merge(df_train1,df_train2,how=\"inner\",on=[\"UNIQUE_ID\",\"LABEL_CODE\",\"TIME_STAMP\"])\n",
    "temp13=pd.merge(df_train1,df_train3,how=\"inner\",on=[\"UNIQUE_ID\",\"LABEL_CODE\",\"TIME_STAMP\"])\n",
    "temp23=pd.merge(df_train2,df_train3,how=\"inner\",on=[\"UNIQUE_ID\",\"LABEL_CODE\",\"TIME_STAMP\"])\n",
    "temp123=pd.merge(temp12,temp23,how=\"inner\",on=[\"UNIQUE_ID\",\"LABEL_CODE\",\"TIME_STAMP\"])\n",
    "\n",
    " \n",
    "# Make the diagram\n",
    "venn3(subsets = (len(df_train1.index)-len(temp12.index)-len(temp13.index)+len(temp123.index), len(df_train2.index)-len(temp12.index)-len(temp23.index)+len(temp123.index), len(temp12.index)-len(temp123), len(df_train3.index)-len(temp13.index)-len(temp23.index)+len(temp123.index),len(temp13.index)-len(temp123.index),len(temp23.index)-len(temp123.index),len(temp123)))\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'df_train1' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-24-66d445da6a7c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m#Save locally.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0mcomplete_tensor_nocov\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutfile_path\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m\"complete_tensor.csv\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m#Full data\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mdf_train1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutfile_path\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m\"complete_tensor_train1.csv\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m#Train data\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      4\u001b[0m \u001b[0mdf_val1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutfile_path\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m\"complete_tensor_val1.csv\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m#Validation data\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0mdf_train2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutfile_path\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m\"complete_tensor_train2.csv\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m#Train data\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mNameError\u001b[0m: name 'df_train1' is not defined"
     ]
    }
   ],
   "source": [
    "#Save locally.\n",
    "complete_tensor_nocov.to_csv(outfile_path+\"complete_tensor.csv\") #Full data\n",
    "df_train1.to_csv(outfile_path+\"complete_tensor_train1.csv\") #Train data\n",
    "df_val1.to_csv(outfile_path+\"complete_tensor_val1.csv\") #Validation data\n",
    "df_train2.to_csv(outfile_path+\"complete_tensor_train2.csv\") #Train data\n",
    "df_val2.to_csv(outfile_path+\"complete_tensor_val2.csv\") #Validation data\n",
    "df_train3.to_csv(outfile_path+\"complete_tensor_train3.csv\") #Train data\n",
    "df_val3.to_csv(outfile_path+\"complete_tensor_val3.csv\") #Validation data\n",
    "df_test.to_csv(outfile_path+\"complete_tensor_test.csv\") #Test data\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Covariates dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "#We create a data set with the covariates\n",
    "covariates=complete_tensor.groupby(\"UNIQUE_ID\").nth(0)[list(hot_encodings.columns)]\n",
    "covariates.to_csv(outfile_path+\"complete_covariates.csv\") #save locally"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Creation of the dataset for LSTM operation\n",
    "\n",
    "We split the data patient-wise and provide imputation methods."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Unique_ids of train and test\n",
    "test_prop=0.1\n",
    "val_prop=0.2\n",
    "sorted_unique_ids=np.sort(unique_ids)\n",
    "train_unique_ids=sorted_unique_ids[:int((1-test_prop)*(1-val_prop)*len(unique_ids))]\n",
    "val_unique_ids=sorted_unique_ids[int((1-test_prop)*(1-val_prop)*len(unique_ids)):int((1-test_prop)*len(unique_ids))]\n",
    "test_unique_ids=sorted_unique_ids[int((1-test_prop)*len(unique_ids)):]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Death tags"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "death_tags_train_df=death_tags_df.loc[death_tags_df[\"UNIQUE_ID\"].isin(list(train_unique_ids))].sort_values(by=\"UNIQUE_ID\")\n",
    "death_tags_val_df=death_tags_df.loc[death_tags_df[\"UNIQUE_ID\"].isin(list(val_unique_ids))].sort_values(by=\"UNIQUE_ID\")\n",
    "death_tags_test_df=death_tags_df.loc[death_tags_df[\"UNIQUE_ID\"].isin(list(test_unique_ids))].sort_values(by=\"UNIQUE_ID\")\n",
    "\n",
    "death_tags_train_df.to_csv(outfile_path+\"LSTM_death_tags_train.csv\")\n",
    "death_tags_val_df.to_csv(outfile_path+\"LSTM_death_tags_val.csv\")\n",
    "death_tags_test_df.to_csv(outfile_path+\"LSTM_death_tags_test.csv\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Tensor split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Create a segmented tensor (by patients)\n",
    "complete_tensor_train=complete_tensor_nocov.loc[complete_tensor_nocov[\"UNIQUE_ID\"].isin(list(train_unique_ids))].sort_values(by=\"UNIQUE_ID\")\n",
    "complete_tensor_val=complete_tensor_nocov.loc[complete_tensor_nocov[\"UNIQUE_ID\"].isin(list(val_unique_ids))].sort_values(by=\"UNIQUE_ID\")\n",
    "complete_tensor_test=complete_tensor_nocov.loc[complete_tensor_nocov[\"UNIQUE_ID\"].isin(list(test_unique_ids))].sort_values(by=\"UNIQUE_ID\")\n",
    "\n",
    "complete_tensor_train.to_csv(outfile_path+\"LSTM_tensor_train.csv\") \n",
    "complete_tensor_val.to_csv(outfile_path+\"LSTM_tensor_val.csv\") \n",
    "complete_tensor_test.to_csv(outfile_path+\"LSTM_tensor_test.csv\") "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Covariates split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "covariates_train=covariates.loc[covariates.index.isin(train_unique_ids)].sort_index()\n",
    "covariates_val=covariates.loc[covariates.index.isin(val_unique_ids)].sort_index()\n",
    "covariates_test=covariates.loc[covariates.index.isin(test_unique_ids)].sort_index()\n",
    "\n",
    "covariates_train.to_csv(outfile_path+\"LSTM_covariates_train.csv\") #save locally\n",
    "covariates_val.to_csv(outfile_path+\"LSTM_covariates_val.csv\") #save locally\n",
    "covariates_test.to_csv(outfile_path+\"LSTM_covariates_test.csv\") #save locally"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Mean Imputation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Vector containing the mean_values of each dimension.\n",
    "mean_dims=complete_tensor_train.groupby(\"LABEL_CODE\")[\"MEAN\"].mean()\n",
    "mean_dims.to_csv(outfile_path+\"mean_features.csv\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Dataset for GRU_D (continuous time operation)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 200,
   "metadata": {},
   "outputs": [],
   "source": [
    "#map the admission id to the unique id\n",
    "complete_df10[\"UNIQUE_ID\"]=complete_df10[\"HADM_ID\"].map(d)\n",
    "complete_df10[\"TIME_CONTINUOUS\"]=complete_df10[\"TIME\"]/10"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 201,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Add a column with the mean and std of each different measurement type and then normalize them.\n",
    "d_mean=dict(complete_df10.groupby(\"LABEL_CODE\")[\"VALUENUM\"].mean())\n",
    "complete_df10[\"MEAN\"]=complete_df10[\"LABEL_CODE\"].map(d_mean)\n",
    "d_std=dict(complete_df10.groupby(\"LABEL_CODE\")[\"VALUENUM\"].std())\n",
    "complete_df10[\"STD\"]=complete_df10[\"LABEL_CODE\"].map(d_std)\n",
    "complete_df10[\"VALUENORM\"]=(complete_df10[\"VALUENUM\"]-complete_df10[\"MEAN\"])/complete_df10[\"STD\"]\n",
    "\n",
    "assert(len(complete_df10.loc[complete_df10[\"VALUENORM\"]==0].index)==0) #Make sure that there are no zeros. (Zeros can be used to represent missing values then)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 202,
   "metadata": {},
   "outputs": [],
   "source": [
    "max_time_bins=complete_df10.groupby(\"UNIQUE_ID\")[\"TIME\"].nunique().max() #This is the maximal number of different time steps in a patient time series.\n",
    "\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 230,
   "metadata": {},
   "outputs": [],
   "source": [
    "a=complete_df10.sort_values(by=[\"UNIQUE_ID\",\"TIME_CONTINUOUS\"]).copy()\n",
    "a.reset_index(inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 232,
   "metadata": {},
   "outputs": [],
   "source": [
    "b=a.assign(Time_order=a.groupby('UNIQUE_ID').TIME.rank(method='dense') - 1)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 233,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>HADM_ID</th>\n",
       "      <th>TIME</th>\n",
       "      <th>LABEL_CODE</th>\n",
       "      <th>VALUENUM</th>\n",
       "      <th>UNIQUE_ID</th>\n",
       "      <th>MEAN</th>\n",
       "      <th>STD</th>\n",
       "      <th>VALUENORM</th>\n",
       "      <th>TIME_CONTINUOUS</th>\n",
       "      <th>Time_order</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>837734</td>\n",
       "      <td>137552.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>29</td>\n",
       "      <td>2.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.080147</td>\n",
       "      <td>4.302087</td>\n",
       "      <td>0.715966</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>837736</td>\n",
       "      <td>137552.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>30</td>\n",
       "      <td>25.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>24.763740</td>\n",
       "      <td>4.615921</td>\n",
       "      <td>0.051184</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>837738</td>\n",
       "      <td>137552.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>31</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.609944</td>\n",
       "      <td>2.035778</td>\n",
       "      <td>-0.790825</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>837740</td>\n",
       "      <td>137552.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>32</td>\n",
       "      <td>34.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>41.428204</td>\n",
       "      <td>9.528942</td>\n",
       "      <td>-0.779541</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>837742</td>\n",
       "      <td>137552.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>33</td>\n",
       "      <td>7.47</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.145331</td>\n",
       "      <td>0.648851</td>\n",
       "      <td>0.500375</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>837744</td>\n",
       "      <td>137552.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>34</td>\n",
       "      <td>180.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>188.338393</td>\n",
       "      <td>119.567732</td>\n",
       "      <td>-0.069738</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>837746</td>\n",
       "      <td>137552.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>17</td>\n",
       "      <td>11.90</td>\n",
       "      <td>0.0</td>\n",
       "      <td>11.006425</td>\n",
       "      <td>2.161977</td>\n",
       "      <td>0.413314</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>837749</td>\n",
       "      <td>137552.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5</td>\n",
       "      <td>148.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>135.704576</td>\n",
       "      <td>93.624455</td>\n",
       "      <td>0.131327</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>837735</td>\n",
       "      <td>137552.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>29</td>\n",
       "      <td>-1.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-1.080147</td>\n",
       "      <td>4.302087</td>\n",
       "      <td>0.018630</td>\n",
       "      <td>2.3</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>837737</td>\n",
       "      <td>137552.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>30</td>\n",
       "      <td>25.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>24.763740</td>\n",
       "      <td>4.615921</td>\n",
       "      <td>0.051184</td>\n",
       "      <td>2.3</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>837739</td>\n",
       "      <td>137552.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>31</td>\n",
       "      <td>1.40</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.609944</td>\n",
       "      <td>2.035778</td>\n",
       "      <td>-0.594340</td>\n",
       "      <td>2.3</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>837741</td>\n",
       "      <td>137552.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>32</td>\n",
       "      <td>42.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>41.428204</td>\n",
       "      <td>9.528942</td>\n",
       "      <td>0.060006</td>\n",
       "      <td>2.3</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>837743</td>\n",
       "      <td>137552.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>33</td>\n",
       "      <td>7.36</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.145331</td>\n",
       "      <td>0.648851</td>\n",
       "      <td>0.330845</td>\n",
       "      <td>2.3</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>837745</td>\n",
       "      <td>137552.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>34</td>\n",
       "      <td>224.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>188.338393</td>\n",
       "      <td>119.567732</td>\n",
       "      <td>0.298254</td>\n",
       "      <td>2.3</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>837657</td>\n",
       "      <td>137552.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>0</td>\n",
       "      <td>9.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>14.333616</td>\n",
       "      <td>3.958092</td>\n",
       "      <td>-1.347522</td>\n",
       "      <td>3.6</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>837661</td>\n",
       "      <td>137552.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>1</td>\n",
       "      <td>26.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>23.683217</td>\n",
       "      <td>4.552615</td>\n",
       "      <td>0.508891</td>\n",
       "      <td>3.6</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>837666</td>\n",
       "      <td>137552.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>3</td>\n",
       "      <td>110.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>105.072327</td>\n",
       "      <td>6.554967</td>\n",
       "      <td>0.751746</td>\n",
       "      <td>3.6</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>837670</td>\n",
       "      <td>137552.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0.90</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.350306</td>\n",
       "      <td>1.445883</td>\n",
       "      <td>-0.311440</td>\n",
       "      <td>3.6</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>837674</td>\n",
       "      <td>137552.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>5</td>\n",
       "      <td>170.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>135.704576</td>\n",
       "      <td>93.624455</td>\n",
       "      <td>0.366308</td>\n",
       "      <td>3.6</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>837678</td>\n",
       "      <td>137552.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>6</td>\n",
       "      <td>2.90</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.001926</td>\n",
       "      <td>0.519920</td>\n",
       "      <td>1.727330</td>\n",
       "      <td>3.6</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>837683</td>\n",
       "      <td>137552.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>8</td>\n",
       "      <td>4.20</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.151821</td>\n",
       "      <td>0.713661</td>\n",
       "      <td>0.067509</td>\n",
       "      <td>3.6</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>837688</td>\n",
       "      <td>137552.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>9</td>\n",
       "      <td>141.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>138.633262</td>\n",
       "      <td>5.323731</td>\n",
       "      <td>0.444564</td>\n",
       "      <td>3.6</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>837692</td>\n",
       "      <td>137552.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>13</td>\n",
       "      <td>13.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>24.828644</td>\n",
       "      <td>21.361991</td>\n",
       "      <td>-0.553724</td>\n",
       "      <td>3.6</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>837696</td>\n",
       "      <td>137552.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>16</td>\n",
       "      <td>33.40</td>\n",
       "      <td>0.0</td>\n",
       "      <td>32.246866</td>\n",
       "      <td>5.880561</td>\n",
       "      <td>0.196092</td>\n",
       "      <td>3.6</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>837700</td>\n",
       "      <td>137552.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>17</td>\n",
       "      <td>11.60</td>\n",
       "      <td>0.0</td>\n",
       "      <td>11.006425</td>\n",
       "      <td>2.161977</td>\n",
       "      <td>0.274552</td>\n",
       "      <td>3.6</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>837704</td>\n",
       "      <td>137552.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>19</td>\n",
       "      <td>31.50</td>\n",
       "      <td>0.0</td>\n",
       "      <td>30.393121</td>\n",
       "      <td>2.489034</td>\n",
       "      <td>0.444702</td>\n",
       "      <td>3.6</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>837708</td>\n",
       "      <td>137552.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>20</td>\n",
       "      <td>34.60</td>\n",
       "      <td>0.0</td>\n",
       "      <td>34.072032</td>\n",
       "      <td>1.555948</td>\n",
       "      <td>0.339322</td>\n",
       "      <td>3.6</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>837712</td>\n",
       "      <td>137552.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>21</td>\n",
       "      <td>91.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>89.271448</td>\n",
       "      <td>6.537488</td>\n",
       "      <td>0.264406</td>\n",
       "      <td>3.6</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>837716</td>\n",
       "      <td>137552.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>24</td>\n",
       "      <td>286.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>212.095378</td>\n",
       "      <td>110.493716</td>\n",
       "      <td>0.668858</td>\n",
       "      <td>3.6</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>837720</td>\n",
       "      <td>137552.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>25</td>\n",
       "      <td>13.40</td>\n",
       "      <td>0.0</td>\n",
       "      <td>14.662763</td>\n",
       "      <td>1.970897</td>\n",
       "      <td>-0.640705</td>\n",
       "      <td>3.6</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3225010</th>\n",
       "      <td>1128786</td>\n",
       "      <td>150709.0</td>\n",
       "      <td>338.0</td>\n",
       "      <td>8</td>\n",
       "      <td>3.70</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.151821</td>\n",
       "      <td>0.713661</td>\n",
       "      <td>-0.633104</td>\n",
       "      <td>33.8</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3225011</th>\n",
       "      <td>1128788</td>\n",
       "      <td>150709.0</td>\n",
       "      <td>338.0</td>\n",
       "      <td>9</td>\n",
       "      <td>140.00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>138.633262</td>\n",
       "      <td>5.323731</td>\n",
       "      <td>0.256726</td>\n",
       "      <td>33.8</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3225012</th>\n",
       "      <td>1128790</td>\n",
       "      <td>150709.0</td>\n",
       "      <td>338.0</td>\n",
       "      <td>13</td>\n",
       "      <td>8.00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>24.828644</td>\n",
       "      <td>21.361991</td>\n",
       "      <td>-0.787784</td>\n",
       "      <td>33.8</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3225013</th>\n",
       "      <td>1128794</td>\n",
       "      <td>150709.0</td>\n",
       "      <td>338.0</td>\n",
       "      <td>16</td>\n",
       "      <td>42.70</td>\n",
       "      <td>NaN</td>\n",
       "      <td>32.246866</td>\n",
       "      <td>5.880561</td>\n",
       "      <td>1.777574</td>\n",
       "      <td>33.8</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3225014</th>\n",
       "      <td>1128796</td>\n",
       "      <td>150709.0</td>\n",
       "      <td>338.0</td>\n",
       "      <td>17</td>\n",
       "      <td>14.30</td>\n",
       "      <td>NaN</td>\n",
       "      <td>11.006425</td>\n",
       "      <td>2.161977</td>\n",
       "      <td>1.523409</td>\n",
       "      <td>33.8</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3225015</th>\n",
       "      <td>1128799</td>\n",
       "      <td>150709.0</td>\n",
       "      <td>338.0</td>\n",
       "      <td>19</td>\n",
       "      <td>27.40</td>\n",
       "      <td>NaN</td>\n",
       "      <td>30.393121</td>\n",
       "      <td>2.489034</td>\n",
       "      <td>-1.202523</td>\n",
       "      <td>33.8</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3225016</th>\n",
       "      <td>1128801</td>\n",
       "      <td>150709.0</td>\n",
       "      <td>338.0</td>\n",
       "      <td>20</td>\n",
       "      <td>33.40</td>\n",
       "      <td>NaN</td>\n",
       "      <td>34.072032</td>\n",
       "      <td>1.555948</td>\n",
       "      <td>-0.431912</td>\n",
       "      <td>33.8</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3225017</th>\n",
       "      <td>1128803</td>\n",
       "      <td>150709.0</td>\n",
       "      <td>338.0</td>\n",
       "      <td>21</td>\n",
       "      <td>82.00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>89.271448</td>\n",
       "      <td>6.537488</td>\n",
       "      <td>-1.112269</td>\n",
       "      <td>33.8</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3225018</th>\n",
       "      <td>1128808</td>\n",
       "      <td>150709.0</td>\n",
       "      <td>338.0</td>\n",
       "      <td>24</td>\n",
       "      <td>266.00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>212.095378</td>\n",
       "      <td>110.493716</td>\n",
       "      <td>0.487852</td>\n",
       "      <td>33.8</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3225019</th>\n",
       "      <td>1128810</td>\n",
       "      <td>150709.0</td>\n",
       "      <td>338.0</td>\n",
       "      <td>25</td>\n",
       "      <td>13.80</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14.662763</td>\n",
       "      <td>1.970897</td>\n",
       "      <td>-0.437752</td>\n",
       "      <td>33.8</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3225020</th>\n",
       "      <td>1128812</td>\n",
       "      <td>150709.0</td>\n",
       "      <td>338.0</td>\n",
       "      <td>26</td>\n",
       "      <td>5.21</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.706550</td>\n",
       "      <td>0.715238</td>\n",
       "      <td>2.102027</td>\n",
       "      <td>33.8</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3225021</th>\n",
       "      <td>1128814</td>\n",
       "      <td>150709.0</td>\n",
       "      <td>338.0</td>\n",
       "      <td>27</td>\n",
       "      <td>9.20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>11.987718</td>\n",
       "      <td>9.511521</td>\n",
       "      <td>-0.293089</td>\n",
       "      <td>33.8</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3225022</th>\n",
       "      <td>1128817</td>\n",
       "      <td>150709.0</td>\n",
       "      <td>338.0</td>\n",
       "      <td>28</td>\n",
       "      <td>79.80</td>\n",
       "      <td>NaN</td>\n",
       "      <td>39.867142</td>\n",
       "      <td>24.700711</td>\n",
       "      <td>1.616668</td>\n",
       "      <td>33.8</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3225023</th>\n",
       "      <td>1128820</td>\n",
       "      <td>150709.0</td>\n",
       "      <td>338.0</td>\n",
       "      <td>35</td>\n",
       "      <td>15.70</td>\n",
       "      <td>NaN</td>\n",
       "      <td>15.718341</td>\n",
       "      <td>7.154602</td>\n",
       "      <td>-0.002564</td>\n",
       "      <td>33.8</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3225024</th>\n",
       "      <td>432561</td>\n",
       "      <td>119225.0</td>\n",
       "      <td>350.0</td>\n",
       "      <td>16</td>\n",
       "      <td>26.00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>32.246866</td>\n",
       "      <td>5.880561</td>\n",
       "      <td>-1.062291</td>\n",
       "      <td>35.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3225025</th>\n",
       "      <td>1371756</td>\n",
       "      <td>161767.0</td>\n",
       "      <td>355.0</td>\n",
       "      <td>10</td>\n",
       "      <td>73.00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>119.685420</td>\n",
       "      <td>151.288875</td>\n",
       "      <td>-0.308585</td>\n",
       "      <td>35.5</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3225026</th>\n",
       "      <td>1371757</td>\n",
       "      <td>161767.0</td>\n",
       "      <td>355.0</td>\n",
       "      <td>11</td>\n",
       "      <td>18.00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>317.656061</td>\n",
       "      <td>1277.784289</td>\n",
       "      <td>-0.234512</td>\n",
       "      <td>35.5</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3225027</th>\n",
       "      <td>1371758</td>\n",
       "      <td>161767.0</td>\n",
       "      <td>355.0</td>\n",
       "      <td>12</td>\n",
       "      <td>1.30</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.100389</td>\n",
       "      <td>4.622310</td>\n",
       "      <td>-0.173158</td>\n",
       "      <td>35.5</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3225028</th>\n",
       "      <td>1371793</td>\n",
       "      <td>161767.0</td>\n",
       "      <td>355.0</td>\n",
       "      <td>36</td>\n",
       "      <td>17.00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>267.646865</td>\n",
       "      <td>1045.432709</td>\n",
       "      <td>-0.239754</td>\n",
       "      <td>35.5</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3225029</th>\n",
       "      <td>1214823</td>\n",
       "      <td>154649.0</td>\n",
       "      <td>365.0</td>\n",
       "      <td>16</td>\n",
       "      <td>40.20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>32.246866</td>\n",
       "      <td>5.880561</td>\n",
       "      <td>1.352445</td>\n",
       "      <td>36.5</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3225030</th>\n",
       "      <td>1308617</td>\n",
       "      <td>158934.0</td>\n",
       "      <td>370.0</td>\n",
       "      <td>16</td>\n",
       "      <td>39.60</td>\n",
       "      <td>NaN</td>\n",
       "      <td>32.246866</td>\n",
       "      <td>5.880561</td>\n",
       "      <td>1.250414</td>\n",
       "      <td>37.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3225031</th>\n",
       "      <td>1128818</td>\n",
       "      <td>150709.0</td>\n",
       "      <td>401.0</td>\n",
       "      <td>28</td>\n",
       "      <td>84.80</td>\n",
       "      <td>NaN</td>\n",
       "      <td>39.867142</td>\n",
       "      <td>24.700711</td>\n",
       "      <td>1.819092</td>\n",
       "      <td>40.1</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3225032</th>\n",
       "      <td>432573</td>\n",
       "      <td>119225.0</td>\n",
       "      <td>415.0</td>\n",
       "      <td>29</td>\n",
       "      <td>-7.00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-1.080147</td>\n",
       "      <td>4.302087</td>\n",
       "      <td>-1.376042</td>\n",
       "      <td>41.5</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3225033</th>\n",
       "      <td>432575</td>\n",
       "      <td>119225.0</td>\n",
       "      <td>415.0</td>\n",
       "      <td>30</td>\n",
       "      <td>18.00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>24.763740</td>\n",
       "      <td>4.615921</td>\n",
       "      <td>-1.465307</td>\n",
       "      <td>41.5</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3225034</th>\n",
       "      <td>432578</td>\n",
       "      <td>119225.0</td>\n",
       "      <td>415.0</td>\n",
       "      <td>32</td>\n",
       "      <td>31.00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>41.428204</td>\n",
       "      <td>9.528942</td>\n",
       "      <td>-1.094372</td>\n",
       "      <td>41.5</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3225035</th>\n",
       "      <td>432580</td>\n",
       "      <td>119225.0</td>\n",
       "      <td>415.0</td>\n",
       "      <td>33</td>\n",
       "      <td>7.35</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7.145331</td>\n",
       "      <td>0.648851</td>\n",
       "      <td>0.315433</td>\n",
       "      <td>41.5</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3225036</th>\n",
       "      <td>432582</td>\n",
       "      <td>119225.0</td>\n",
       "      <td>415.0</td>\n",
       "      <td>34</td>\n",
       "      <td>64.00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>188.338393</td>\n",
       "      <td>119.567732</td>\n",
       "      <td>-1.039899</td>\n",
       "      <td>41.5</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3225037</th>\n",
       "      <td>432584</td>\n",
       "      <td>119225.0</td>\n",
       "      <td>415.0</td>\n",
       "      <td>17</td>\n",
       "      <td>8.70</td>\n",
       "      <td>NaN</td>\n",
       "      <td>11.006425</td>\n",
       "      <td>2.161977</td>\n",
       "      <td>-1.066813</td>\n",
       "      <td>41.5</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3225038</th>\n",
       "      <td>432585</td>\n",
       "      <td>119225.0</td>\n",
       "      <td>440.0</td>\n",
       "      <td>17</td>\n",
       "      <td>8.80</td>\n",
       "      <td>NaN</td>\n",
       "      <td>11.006425</td>\n",
       "      <td>2.161977</td>\n",
       "      <td>-1.020559</td>\n",
       "      <td>44.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3225039</th>\n",
       "      <td>432587</td>\n",
       "      <td>119225.0</td>\n",
       "      <td>440.0</td>\n",
       "      <td>5</td>\n",
       "      <td>157.00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>135.704576</td>\n",
       "      <td>93.624455</td>\n",
       "      <td>0.227456</td>\n",
       "      <td>44.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3225040 rows × 11 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           index   HADM_ID   TIME  LABEL_CODE  VALUENUM  UNIQUE_ID  \\\n",
       "0         837734  137552.0    0.0          29      2.00        0.0   \n",
       "1         837736  137552.0    0.0          30     25.00        0.0   \n",
       "2         837738  137552.0    0.0          31      1.00        0.0   \n",
       "3         837740  137552.0    0.0          32     34.00        0.0   \n",
       "4         837742  137552.0    0.0          33      7.47        0.0   \n",
       "5         837744  137552.0    0.0          34    180.00        0.0   \n",
       "6         837746  137552.0    0.0          17     11.90        0.0   \n",
       "7         837749  137552.0    0.0           5    148.00        0.0   \n",
       "8         837735  137552.0   23.0          29     -1.00        0.0   \n",
       "9         837737  137552.0   23.0          30     25.00        0.0   \n",
       "10        837739  137552.0   23.0          31      1.40        0.0   \n",
       "11        837741  137552.0   23.0          32     42.00        0.0   \n",
       "12        837743  137552.0   23.0          33      7.36        0.0   \n",
       "13        837745  137552.0   23.0          34    224.00        0.0   \n",
       "14        837657  137552.0   36.0           0      9.00        0.0   \n",
       "15        837661  137552.0   36.0           1     26.00        0.0   \n",
       "16        837666  137552.0   36.0           3    110.00        0.0   \n",
       "17        837670  137552.0   36.0           4      0.90        0.0   \n",
       "18        837674  137552.0   36.0           5    170.00        0.0   \n",
       "19        837678  137552.0   36.0           6      2.90        0.0   \n",
       "20        837683  137552.0   36.0           8      4.20        0.0   \n",
       "21        837688  137552.0   36.0           9    141.00        0.0   \n",
       "22        837692  137552.0   36.0          13     13.00        0.0   \n",
       "23        837696  137552.0   36.0          16     33.40        0.0   \n",
       "24        837700  137552.0   36.0          17     11.60        0.0   \n",
       "25        837704  137552.0   36.0          19     31.50        0.0   \n",
       "26        837708  137552.0   36.0          20     34.60        0.0   \n",
       "27        837712  137552.0   36.0          21     91.00        0.0   \n",
       "28        837716  137552.0   36.0          24    286.00        0.0   \n",
       "29        837720  137552.0   36.0          25     13.40        0.0   \n",
       "...          ...       ...    ...         ...       ...        ...   \n",
       "3225010  1128786  150709.0  338.0           8      3.70        NaN   \n",
       "3225011  1128788  150709.0  338.0           9    140.00        NaN   \n",
       "3225012  1128790  150709.0  338.0          13      8.00        NaN   \n",
       "3225013  1128794  150709.0  338.0          16     42.70        NaN   \n",
       "3225014  1128796  150709.0  338.0          17     14.30        NaN   \n",
       "3225015  1128799  150709.0  338.0          19     27.40        NaN   \n",
       "3225016  1128801  150709.0  338.0          20     33.40        NaN   \n",
       "3225017  1128803  150709.0  338.0          21     82.00        NaN   \n",
       "3225018  1128808  150709.0  338.0          24    266.00        NaN   \n",
       "3225019  1128810  150709.0  338.0          25     13.80        NaN   \n",
       "3225020  1128812  150709.0  338.0          26      5.21        NaN   \n",
       "3225021  1128814  150709.0  338.0          27      9.20        NaN   \n",
       "3225022  1128817  150709.0  338.0          28     79.80        NaN   \n",
       "3225023  1128820  150709.0  338.0          35     15.70        NaN   \n",
       "3225024   432561  119225.0  350.0          16     26.00        NaN   \n",
       "3225025  1371756  161767.0  355.0          10     73.00        NaN   \n",
       "3225026  1371757  161767.0  355.0          11     18.00        NaN   \n",
       "3225027  1371758  161767.0  355.0          12      1.30        NaN   \n",
       "3225028  1371793  161767.0  355.0          36     17.00        NaN   \n",
       "3225029  1214823  154649.0  365.0          16     40.20        NaN   \n",
       "3225030  1308617  158934.0  370.0          16     39.60        NaN   \n",
       "3225031  1128818  150709.0  401.0          28     84.80        NaN   \n",
       "3225032   432573  119225.0  415.0          29     -7.00        NaN   \n",
       "3225033   432575  119225.0  415.0          30     18.00        NaN   \n",
       "3225034   432578  119225.0  415.0          32     31.00        NaN   \n",
       "3225035   432580  119225.0  415.0          33      7.35        NaN   \n",
       "3225036   432582  119225.0  415.0          34     64.00        NaN   \n",
       "3225037   432584  119225.0  415.0          17      8.70        NaN   \n",
       "3225038   432585  119225.0  440.0          17      8.80        NaN   \n",
       "3225039   432587  119225.0  440.0           5    157.00        NaN   \n",
       "\n",
       "               MEAN          STD  VALUENORM  TIME_CONTINUOUS  Time_order  \n",
       "0         -1.080147     4.302087   0.715966              0.0         0.0  \n",
       "1         24.763740     4.615921   0.051184              0.0         0.0  \n",
       "2          2.609944     2.035778  -0.790825              0.0         0.0  \n",
       "3         41.428204     9.528942  -0.779541              0.0         0.0  \n",
       "4          7.145331     0.648851   0.500375              0.0         0.0  \n",
       "5        188.338393   119.567732  -0.069738              0.0         0.0  \n",
       "6         11.006425     2.161977   0.413314              0.0         0.0  \n",
       "7        135.704576    93.624455   0.131327              0.0         0.0  \n",
       "8         -1.080147     4.302087   0.018630              2.3         1.0  \n",
       "9         24.763740     4.615921   0.051184              2.3         1.0  \n",
       "10         2.609944     2.035778  -0.594340              2.3         1.0  \n",
       "11        41.428204     9.528942   0.060006              2.3         1.0  \n",
       "12         7.145331     0.648851   0.330845              2.3         1.0  \n",
       "13       188.338393   119.567732   0.298254              2.3         1.0  \n",
       "14        14.333616     3.958092  -1.347522              3.6         2.0  \n",
       "15        23.683217     4.552615   0.508891              3.6         2.0  \n",
       "16       105.072327     6.554967   0.751746              3.6         2.0  \n",
       "17         1.350306     1.445883  -0.311440              3.6         2.0  \n",
       "18       135.704576    93.624455   0.366308              3.6         2.0  \n",
       "19         2.001926     0.519920   1.727330              3.6         2.0  \n",
       "20         4.151821     0.713661   0.067509              3.6         2.0  \n",
       "21       138.633262     5.323731   0.444564              3.6         2.0  \n",
       "22        24.828644    21.361991  -0.553724              3.6         2.0  \n",
       "23        32.246866     5.880561   0.196092              3.6         2.0  \n",
       "24        11.006425     2.161977   0.274552              3.6         2.0  \n",
       "25        30.393121     2.489034   0.444702              3.6         2.0  \n",
       "26        34.072032     1.555948   0.339322              3.6         2.0  \n",
       "27        89.271448     6.537488   0.264406              3.6         2.0  \n",
       "28       212.095378   110.493716   0.668858              3.6         2.0  \n",
       "29        14.662763     1.970897  -0.640705              3.6         2.0  \n",
       "...             ...          ...        ...              ...         ...  \n",
       "3225010    4.151821     0.713661  -0.633104             33.8         NaN  \n",
       "3225011  138.633262     5.323731   0.256726             33.8         NaN  \n",
       "3225012   24.828644    21.361991  -0.787784             33.8         NaN  \n",
       "3225013   32.246866     5.880561   1.777574             33.8         NaN  \n",
       "3225014   11.006425     2.161977   1.523409             33.8         NaN  \n",
       "3225015   30.393121     2.489034  -1.202523             33.8         NaN  \n",
       "3225016   34.072032     1.555948  -0.431912             33.8         NaN  \n",
       "3225017   89.271448     6.537488  -1.112269             33.8         NaN  \n",
       "3225018  212.095378   110.493716   0.487852             33.8         NaN  \n",
       "3225019   14.662763     1.970897  -0.437752             33.8         NaN  \n",
       "3225020    3.706550     0.715238   2.102027             33.8         NaN  \n",
       "3225021   11.987718     9.511521  -0.293089             33.8         NaN  \n",
       "3225022   39.867142    24.700711   1.616668             33.8         NaN  \n",
       "3225023   15.718341     7.154602  -0.002564             33.8         NaN  \n",
       "3225024   32.246866     5.880561  -1.062291             35.0         NaN  \n",
       "3225025  119.685420   151.288875  -0.308585             35.5         NaN  \n",
       "3225026  317.656061  1277.784289  -0.234512             35.5         NaN  \n",
       "3225027    2.100389     4.622310  -0.173158             35.5         NaN  \n",
       "3225028  267.646865  1045.432709  -0.239754             35.5         NaN  \n",
       "3225029   32.246866     5.880561   1.352445             36.5         NaN  \n",
       "3225030   32.246866     5.880561   1.250414             37.0         NaN  \n",
       "3225031   39.867142    24.700711   1.819092             40.1         NaN  \n",
       "3225032   -1.080147     4.302087  -1.376042             41.5         NaN  \n",
       "3225033   24.763740     4.615921  -1.465307             41.5         NaN  \n",
       "3225034   41.428204     9.528942  -1.094372             41.5         NaN  \n",
       "3225035    7.145331     0.648851   0.315433             41.5         NaN  \n",
       "3225036  188.338393   119.567732  -1.039899             41.5         NaN  \n",
       "3225037   11.006425     2.161977  -1.066813             41.5         NaN  \n",
       "3225038   11.006425     2.161977  -1.020559             44.0         NaN  \n",
       "3225039  135.704576    93.624455   0.227456             44.0         NaN  \n",
       "\n",
       "[3225040 rows x 11 columns]"
      ]
     },
     "execution_count": 233,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#### END OF FILE ####### (below is testing stuff.)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([    0,     1,     2, ..., 15297, 15298, 15299])"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tags=pd.read_csv(outfile_path+\"LSTM_death_tags_train.csv\")\n",
    "tags[\"UNIQUE_ID\"].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([15300, 15301, 15302, ..., 19122, 19123, 19124])"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tags=pd.read_csv(outfile_path+\"LSTM_death_tags_val.csv\")\n",
    "tags[\"UNIQUE_ID\"].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "df=pd.read_csv(outfile_path+\"LSTM_tensor_train.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([    0,     1,     2, ..., 15297, 15298, 15299])"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"UNIQUE_ID\"].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "15300"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"UNIQUE_ID\"].nunique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2=pd.read_csv(outfile_path+\"LSTM_tensor_val.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([15300, 15301, 15302, ..., 19122, 19123, 19124])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2[\"UNIQUE_ID\"].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "df3=pd.concat([df,df2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([    0,     1,     2, ..., 19122, 19123, 19124])"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df3[\"UNIQUE_ID\"].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 151,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/XXXX/miniconda3/envs/pytorch/lib/python3.6/site-packages/pandas/core/series.py:2890: FutureWarning: from_csv is deprecated. Please use read_csv(...) instead. Note that some of the default arguments are different, so please refer to the documentation for from_csv when changing your function calls\n",
      "  infer_datetime_format=infer_datetime_format)\n"
     ]
    }
   ],
   "source": [
    "means_df=pd.Series.from_csv(\"~/Documents/Data/Full_MIMIC/Clean_data/mean_features.csv\")\n",
    "means_vec=torch.tensor(means_df.as_matrix())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 153,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([96])"
      ]
     },
     "execution_count": 153,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "means_vec.size()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LABEL_CODE\n",
       "0      14.332368\n",
       "1      23.683532\n",
       "2       8.319696\n",
       "3     105.072921\n",
       "4       1.349956\n",
       "5     135.362454\n",
       "6       2.001855\n",
       "7       3.504950\n",
       "8       4.151537\n",
       "9     138.632943\n",
       "10    119.696930\n",
       "11    317.702850\n",
       "12      2.099668\n",
       "13     24.828327\n",
       "14      0.305753\n",
       "15      1.108482\n",
       "16     32.246985\n",
       "17     11.025442\n",
       "18     14.218029\n",
       "19     30.393274\n",
       "20     34.072047\n",
       "21     89.270204\n",
       "22      5.021936\n",
       "23     77.249455\n",
       "24    212.120463\n",
       "25     14.662603\n",
       "26      3.706547\n",
       "27     11.987865\n",
       "28     39.857678\n",
       "29     -1.064501\n",
       "         ...    \n",
       "66    108.225806\n",
       "67    211.900036\n",
       "68    135.809443\n",
       "69      3.275288\n",
       "70     70.765905\n",
       "71     40.142469\n",
       "72     11.517381\n",
       "73      2.691385\n",
       "74      5.582121\n",
       "75     73.378317\n",
       "76    315.733581\n",
       "77     20.315678\n",
       "78    605.229537\n",
       "79    235.878698\n",
       "80      1.307061\n",
       "81     59.813022\n",
       "82    574.522817\n",
       "83     42.854298\n",
       "84    108.222934\n",
       "85    196.146218\n",
       "86    174.020606\n",
       "87     30.696462\n",
       "88    819.275713\n",
       "89      1.757776\n",
       "90      0.491271\n",
       "91     92.211538\n",
       "92    187.327189\n",
       "93    516.350000\n",
       "94     98.466102\n",
       "95    307.347741\n",
       "Name: MEAN, Length: 96, dtype: float64"
      ]
     },
     "execution_count": 144,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mean_dims"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "cov=pd.read_csv(file_path+\"complete_covariates.csv\")\n",
    "df_train=pd.read_csv(file_path+\"complete_tensor_train1.csv\")\n",
    "df_val=pd.read_csv(file_path+\"complete_tensor_val1.csv\")\n",
    "deaths=pd.read_csv(file_path+\"complete_death_tags.csv\")\n",
    "df=pd.read_csv(file_path+\"complete_tensor.csv\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create a segmented dataset by patients for actual testing."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "unique_ids=cov[\"UNIQUE_ID\"]\n",
    "train_unique_ids,test_unique_ids=train_test_split(unique_ids,test_size=0.1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_segment_train=df.loc[df[\"UNIQUE_ID\"].isin(list(train_unique_ids))]\n",
    "df_segment_test=df.loc[df[\"UNIQUE_ID\"].isin(list(test_unique_ids))]\n",
    "cov_segment_train=cov.loc[cov[\"UNIQUE_ID\"].isin(list(train_unique_ids))]\n",
    "cov_segment_test=cov.loc[cov[\"UNIQUE_ID\"].isin(list(test_unique_ids))]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_segment_train.to_csv(file_path+\"segmented_tensor_train.csv\")\n",
    "df_segment_test.to_csv(file_path+\"segmented_tensor_test.csv\")\n",
    "cov_segment_train.to_csv(file_path+\"segmented_covariates_train.csv\")\n",
    "cov_segment_test.to_csv(file_path+\"segmented_covariates_test.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[0,\n",
       " 1,\n",
       " 2,\n",
       " 3,\n",
       " 4,\n",
       " 5,\n",
       " 6,\n",
       " 7,\n",
       " 8,\n",
       " 9,\n",
       " 10,\n",
       " 11,\n",
       " 12,\n",
       " 13,\n",
       " 14,\n",
       " 15,\n",
       " 16,\n",
       " 17,\n",
       " 18,\n",
       " 19,\n",
       " 20,\n",
       " 21,\n",
       " 22,\n",
       " 23,\n",
       " 24,\n",
       " 25,\n",
       " 26,\n",
       " 27,\n",
       " 28,\n",
       " 29,\n",
       " 30,\n",
       " 31,\n",
       " 32,\n",
       " 33,\n",
       " 34,\n",
       " 35,\n",
       " 36,\n",
       " 37,\n",
       " 38,\n",
       " 39,\n",
       " 40,\n",
       " 41,\n",
       " 42,\n",
       " 43,\n",
       " 44,\n",
       " 45,\n",
       " 46,\n",
       " 47,\n",
       " 48,\n",
       " 49,\n",
       " 50,\n",
       " 51,\n",
       " 52,\n",
       " 53,\n",
       " 54,\n",
       " 55,\n",
       " 56,\n",
       " 57,\n",
       " 58,\n",
       " 59,\n",
       " 60,\n",
       " 61,\n",
       " 62,\n",
       " 63,\n",
       " 64,\n",
       " 65,\n",
       " 66,\n",
       " 67,\n",
       " 68,\n",
       " 69,\n",
       " 70,\n",
       " 71,\n",
       " 72,\n",
       " 73,\n",
       " 74,\n",
       " 75,\n",
       " 76,\n",
       " 77,\n",
       " 78,\n",
       " 79,\n",
       " 80,\n",
       " 81,\n",
       " 82,\n",
       " 83,\n",
       " 84,\n",
       " 85,\n",
       " 86,\n",
       " 87,\n",
       " 88,\n",
       " 89,\n",
       " 90,\n",
       " 91,\n",
       " 92,\n",
       " 93,\n",
       " 94,\n",
       " 95,\n",
       " 96,\n",
       " 97,\n",
       " 98,\n",
       " 99,\n",
       " 100,\n",
       " 101,\n",
       " 102,\n",
       " 103,\n",
       " 104,\n",
       " 105,\n",
       " 106,\n",
       " 107,\n",
       " 108,\n",
       " 109,\n",
       " 110,\n",
       " 111,\n",
       " 112,\n",
       " 113,\n",
       " 114,\n",
       " 115,\n",
       " 116,\n",
       " 117,\n",
       " 118,\n",
       " 119,\n",
       " 120,\n",
       " 121,\n",
       " 122,\n",
       " 123,\n",
       " 124,\n",
       " 125,\n",
       " 126,\n",
       " 127,\n",
       " 128,\n",
       " 129,\n",
       " 130,\n",
       " 131,\n",
       " 132,\n",
       " 133,\n",
       " 134,\n",
       " 135,\n",
       " 136,\n",
       " 137,\n",
       " 138,\n",
       " 139,\n",
       " 140,\n",
       " 141,\n",
       " 142,\n",
       " 143,\n",
       " 144,\n",
       " 145,\n",
       " 146,\n",
       " 147,\n",
       " 148,\n",
       " 149,\n",
       " 150,\n",
       " 151,\n",
       " 152,\n",
       " 153,\n",
       " 154,\n",
       " 155,\n",
       " 156,\n",
       " 157,\n",
       " 158,\n",
       " 159,\n",
       " 160,\n",
       " 161,\n",
       " 162,\n",
       " 163,\n",
       " 164,\n",
       " 165,\n",
       " 166,\n",
       " 167,\n",
       " 168,\n",
       " 169,\n",
       " 170,\n",
       " 171,\n",
       " 172,\n",
       " 173,\n",
       " 174,\n",
       " 175,\n",
       " 176,\n",
       " 177,\n",
       " 178,\n",
       " 179,\n",
       " 180,\n",
       " 181,\n",
       " 182,\n",
       " 183,\n",
       " 184,\n",
       " 185,\n",
       " 186,\n",
       " 187,\n",
       " 188,\n",
       " 189,\n",
       " 190,\n",
       " 191,\n",
       " 192,\n",
       " 193,\n",
       " 194,\n",
       " 195,\n",
       " 196,\n",
       " 197,\n",
       " 198,\n",
       " 199,\n",
       " 200,\n",
       " 201,\n",
       " 202,\n",
       " 203,\n",
       " 204,\n",
       " 205,\n",
       " 206,\n",
       " 207,\n",
       " 208,\n",
       " 209,\n",
       " 210,\n",
       " 211,\n",
       " 212,\n",
       " 213,\n",
       " 214,\n",
       " 215,\n",
       " 216,\n",
       " 217,\n",
       " 218,\n",
       " 219,\n",
       " 220,\n",
       " 221,\n",
       " 222,\n",
       " 223,\n",
       " 224,\n",
       " 225,\n",
       " 226,\n",
       " 227,\n",
       " 228,\n",
       " 229,\n",
       " 230,\n",
       " 231,\n",
       " 232,\n",
       " 233,\n",
       " 234,\n",
       " 235,\n",
       " 236,\n",
       " 237,\n",
       " 238,\n",
       " 239,\n",
       " 240,\n",
       " 241,\n",
       " 242,\n",
       " 243,\n",
       " 244,\n",
       " 245,\n",
       " 246,\n",
       " 247,\n",
       " 248,\n",
       " 249,\n",
       " 250,\n",
       " 251,\n",
       " 252,\n",
       " 253,\n",
       " 254,\n",
       " 255,\n",
       " 256,\n",
       " 257,\n",
       " 258,\n",
       " 259,\n",
       " 260,\n",
       " 261,\n",
       " 262,\n",
       " 263,\n",
       " 264,\n",
       " 265,\n",
       " 266,\n",
       " 267,\n",
       " 268,\n",
       " 269,\n",
       " 270,\n",
       " 271,\n",
       " 272,\n",
       " 273,\n",
       " 274,\n",
       " 275,\n",
       " 276,\n",
       " 277,\n",
       " 278,\n",
       " 279,\n",
       " 280,\n",
       " 281,\n",
       " 282,\n",
       " 283,\n",
       " 284,\n",
       " 285,\n",
       " 286,\n",
       " 287,\n",
       " 288,\n",
       " 289,\n",
       " 290,\n",
       " 291,\n",
       " 292,\n",
       " 293,\n",
       " 294,\n",
       " 295,\n",
       " 296,\n",
       " 297,\n",
       " 298,\n",
       " 299,\n",
       " 300,\n",
       " 301,\n",
       " 302,\n",
       " 303,\n",
       " 304,\n",
       " 305,\n",
       " 306,\n",
       " 307,\n",
       " 308,\n",
       " 309,\n",
       " 310,\n",
       " 311,\n",
       " 312,\n",
       " 313,\n",
       " 314,\n",
       " 315,\n",
       " 316,\n",
       " 317,\n",
       " 318,\n",
       " 319,\n",
       " 320,\n",
       " 321,\n",
       " 322,\n",
       " 323,\n",
       " 324,\n",
       " 325,\n",
       " 326,\n",
       " 327,\n",
       " 328,\n",
       " 329,\n",
       " 330,\n",
       " 331,\n",
       " 332,\n",
       " 333,\n",
       " 334,\n",
       " 335,\n",
       " 336,\n",
       " 337,\n",
       " 338,\n",
       " 339,\n",
       " 340,\n",
       " 341,\n",
       " 342,\n",
       " 343,\n",
       " 344,\n",
       " 345,\n",
       " 346,\n",
       " 347,\n",
       " 348,\n",
       " 349,\n",
       " 350,\n",
       " 351,\n",
       " 352,\n",
       " 353,\n",
       " 354,\n",
       " 355,\n",
       " 356,\n",
       " 357,\n",
       " 358,\n",
       " 359,\n",
       " 360,\n",
       " 361,\n",
       " 362,\n",
       " 363,\n",
       " 364,\n",
       " 365,\n",
       " 366,\n",
       " 367,\n",
       " 368,\n",
       " 369,\n",
       " 370,\n",
       " 371,\n",
       " 372,\n",
       " 373,\n",
       " 374,\n",
       " 375,\n",
       " 376,\n",
       " 377,\n",
       " 378,\n",
       " 379,\n",
       " 380,\n",
       " 381,\n",
       " 382,\n",
       " 383,\n",
       " 384,\n",
       " 385,\n",
       " 386,\n",
       " 387,\n",
       " 388,\n",
       " 389,\n",
       " 390,\n",
       " 391,\n",
       " 392,\n",
       " 393,\n",
       " 394,\n",
       " 395,\n",
       " 396,\n",
       " 397,\n",
       " 398,\n",
       " 399,\n",
       " 400,\n",
       " 401,\n",
       " 402,\n",
       " 403,\n",
       " 404,\n",
       " 405,\n",
       " 406,\n",
       " 407,\n",
       " 408,\n",
       " 409,\n",
       " 410,\n",
       " 411,\n",
       " 412,\n",
       " 413,\n",
       " 414,\n",
       " 415,\n",
       " 416,\n",
       " 417,\n",
       " 418,\n",
       " 419,\n",
       " 420,\n",
       " 421,\n",
       " 422,\n",
       " 423,\n",
       " 424,\n",
       " 425,\n",
       " 426,\n",
       " 427,\n",
       " 428,\n",
       " 429,\n",
       " 430,\n",
       " 431,\n",
       " 432,\n",
       " 433,\n",
       " 434,\n",
       " 435,\n",
       " 436,\n",
       " 437,\n",
       " 438,\n",
       " 439,\n",
       " 440,\n",
       " 441,\n",
       " 442,\n",
       " 443,\n",
       " 444,\n",
       " 445,\n",
       " 446,\n",
       " 447,\n",
       " 448,\n",
       " 449,\n",
       " 450,\n",
       " 451,\n",
       " 452,\n",
       " 453,\n",
       " 454,\n",
       " 455,\n",
       " 456,\n",
       " 457,\n",
       " 458,\n",
       " 459,\n",
       " 460,\n",
       " 461,\n",
       " 462,\n",
       " 463,\n",
       " 464,\n",
       " 465,\n",
       " 466,\n",
       " 467,\n",
       " 468,\n",
       " 469,\n",
       " 470,\n",
       " 471,\n",
       " 472,\n",
       " 473,\n",
       " 474,\n",
       " 475,\n",
       " 476,\n",
       " 477,\n",
       " 478,\n",
       " 479,\n",
       " 480,\n",
       " 481,\n",
       " 482,\n",
       " 483,\n",
       " 484,\n",
       " 485,\n",
       " 486,\n",
       " 487,\n",
       " 488,\n",
       " 489,\n",
       " 490,\n",
       " 491,\n",
       " 492,\n",
       " 493,\n",
       " 494,\n",
       " 495,\n",
       " 496,\n",
       " 497,\n",
       " 498,\n",
       " 499,\n",
       " 500,\n",
       " 501,\n",
       " 502,\n",
       " 503,\n",
       " 504,\n",
       " 505,\n",
       " 506,\n",
       " 507,\n",
       " 508,\n",
       " 509,\n",
       " 510,\n",
       " 511,\n",
       " 512,\n",
       " 513,\n",
       " 514,\n",
       " 515,\n",
       " 516,\n",
       " 517,\n",
       " 518,\n",
       " 519,\n",
       " 520,\n",
       " 521,\n",
       " 522,\n",
       " 523,\n",
       " 524,\n",
       " 525,\n",
       " 526,\n",
       " 527,\n",
       " 528,\n",
       " 529,\n",
       " 530,\n",
       " 531,\n",
       " 532,\n",
       " 533,\n",
       " 534,\n",
       " 535,\n",
       " 536,\n",
       " 537,\n",
       " 538,\n",
       " 539,\n",
       " 540,\n",
       " 541,\n",
       " 542,\n",
       " 543,\n",
       " 544,\n",
       " 545,\n",
       " 546,\n",
       " 547,\n",
       " 548,\n",
       " 549,\n",
       " 550,\n",
       " 551,\n",
       " 552,\n",
       " 553,\n",
       " 554,\n",
       " 555,\n",
       " 556,\n",
       " 557,\n",
       " 558,\n",
       " 559,\n",
       " 560,\n",
       " 561,\n",
       " 562,\n",
       " 563,\n",
       " 564,\n",
       " 565,\n",
       " 566,\n",
       " 567,\n",
       " 568,\n",
       " 569,\n",
       " 570,\n",
       " 571,\n",
       " 572,\n",
       " 573,\n",
       " 574,\n",
       " 575,\n",
       " 576,\n",
       " 577,\n",
       " 578,\n",
       " 579,\n",
       " 580,\n",
       " 581,\n",
       " 582,\n",
       " 583,\n",
       " 584,\n",
       " 585,\n",
       " 586,\n",
       " 587,\n",
       " 588,\n",
       " 589,\n",
       " 590,\n",
       " 591,\n",
       " 592,\n",
       " 593,\n",
       " 594,\n",
       " 595,\n",
       " 596,\n",
       " 597,\n",
       " 598,\n",
       " 599,\n",
       " 600,\n",
       " 601,\n",
       " 602,\n",
       " 603,\n",
       " 604,\n",
       " 605,\n",
       " 606,\n",
       " 607,\n",
       " 608,\n",
       " 609,\n",
       " 610,\n",
       " 611,\n",
       " 612,\n",
       " 613,\n",
       " 614,\n",
       " 615,\n",
       " 616,\n",
       " 617,\n",
       " 618,\n",
       " 619,\n",
       " 620,\n",
       " 621,\n",
       " 622,\n",
       " 623,\n",
       " 624,\n",
       " 625,\n",
       " 626,\n",
       " 627,\n",
       " 628,\n",
       " 629,\n",
       " 630,\n",
       " 631,\n",
       " 632,\n",
       " 633,\n",
       " 634,\n",
       " 635,\n",
       " 636,\n",
       " 637,\n",
       " 638,\n",
       " 639,\n",
       " 640,\n",
       " 641,\n",
       " 642,\n",
       " 643,\n",
       " 644,\n",
       " 645,\n",
       " 646,\n",
       " 647,\n",
       " 648,\n",
       " 649,\n",
       " 650,\n",
       " 651,\n",
       " 652,\n",
       " 653,\n",
       " 654,\n",
       " 655,\n",
       " 656,\n",
       " 657,\n",
       " 658,\n",
       " 659,\n",
       " 660,\n",
       " 661,\n",
       " 662,\n",
       " 663,\n",
       " 664,\n",
       " 665,\n",
       " 666,\n",
       " 667,\n",
       " 668,\n",
       " 669,\n",
       " 670,\n",
       " 671,\n",
       " 672,\n",
       " 673,\n",
       " 674,\n",
       " 675,\n",
       " 676,\n",
       " 677,\n",
       " 678,\n",
       " 679,\n",
       " 680,\n",
       " 681,\n",
       " 682,\n",
       " 683,\n",
       " 684,\n",
       " 685,\n",
       " 686,\n",
       " 687,\n",
       " 688,\n",
       " 689,\n",
       " 690,\n",
       " 691,\n",
       " 692,\n",
       " 693,\n",
       " 694,\n",
       " 695,\n",
       " 696,\n",
       " 697,\n",
       " 698,\n",
       " 699,\n",
       " 700,\n",
       " 701,\n",
       " 702,\n",
       " 703,\n",
       " 704,\n",
       " 705,\n",
       " 706,\n",
       " 707,\n",
       " 708,\n",
       " 709,\n",
       " 710,\n",
       " 711,\n",
       " 712,\n",
       " 713,\n",
       " 714,\n",
       " 715,\n",
       " 716,\n",
       " 717,\n",
       " 718,\n",
       " 719,\n",
       " 720,\n",
       " 721,\n",
       " 722,\n",
       " 723,\n",
       " 724,\n",
       " 725,\n",
       " 726,\n",
       " 727,\n",
       " 728,\n",
       " 729,\n",
       " 730,\n",
       " 731,\n",
       " 732,\n",
       " 733,\n",
       " 734,\n",
       " 735,\n",
       " 736,\n",
       " 737,\n",
       " 738,\n",
       " 739,\n",
       " 740,\n",
       " 741,\n",
       " 742,\n",
       " 743,\n",
       " 744,\n",
       " 745,\n",
       " 746,\n",
       " 747,\n",
       " 748,\n",
       " 749,\n",
       " 750,\n",
       " 751,\n",
       " 752,\n",
       " 753,\n",
       " 754,\n",
       " 755,\n",
       " 756,\n",
       " 757,\n",
       " 758,\n",
       " 759,\n",
       " 760,\n",
       " 761,\n",
       " 762,\n",
       " 763,\n",
       " 764,\n",
       " 765,\n",
       " 766,\n",
       " 767,\n",
       " 768,\n",
       " 769,\n",
       " 770,\n",
       " 771,\n",
       " 772,\n",
       " 773,\n",
       " 774,\n",
       " 775,\n",
       " 776,\n",
       " 777,\n",
       " 778,\n",
       " 779,\n",
       " 780,\n",
       " 781,\n",
       " 782,\n",
       " 783,\n",
       " 784,\n",
       " 785,\n",
       " 786,\n",
       " 787,\n",
       " 788,\n",
       " 789,\n",
       " 790,\n",
       " 791,\n",
       " 792,\n",
       " 793,\n",
       " 794,\n",
       " 795,\n",
       " 796,\n",
       " 797,\n",
       " 798,\n",
       " 799,\n",
       " 800,\n",
       " 801,\n",
       " 802,\n",
       " 803,\n",
       " 804,\n",
       " 805,\n",
       " 806,\n",
       " 807,\n",
       " 808,\n",
       " 809,\n",
       " 810,\n",
       " 811,\n",
       " 812,\n",
       " 813,\n",
       " 814,\n",
       " 815,\n",
       " 816,\n",
       " 817,\n",
       " 818,\n",
       " 819,\n",
       " 820,\n",
       " 821,\n",
       " 822,\n",
       " 823,\n",
       " 824,\n",
       " 825,\n",
       " 826,\n",
       " 827,\n",
       " 828,\n",
       " 829,\n",
       " 830,\n",
       " 831,\n",
       " 832,\n",
       " 833,\n",
       " 834,\n",
       " 835,\n",
       " 836,\n",
       " 837,\n",
       " 838,\n",
       " 839,\n",
       " 840,\n",
       " 841,\n",
       " 842,\n",
       " 843,\n",
       " 844,\n",
       " 845,\n",
       " 846,\n",
       " 847,\n",
       " 848,\n",
       " 849,\n",
       " 850,\n",
       " 851,\n",
       " 852,\n",
       " 853,\n",
       " 854,\n",
       " 855,\n",
       " 856,\n",
       " 857,\n",
       " 858,\n",
       " 859,\n",
       " 860,\n",
       " 861,\n",
       " 862,\n",
       " 863,\n",
       " 864,\n",
       " 865,\n",
       " 866,\n",
       " 867,\n",
       " 868,\n",
       " 869,\n",
       " 870,\n",
       " 871,\n",
       " 872,\n",
       " 873,\n",
       " 874,\n",
       " 875,\n",
       " 876,\n",
       " 877,\n",
       " 878,\n",
       " 879,\n",
       " 880,\n",
       " 881,\n",
       " 882,\n",
       " 883,\n",
       " 884,\n",
       " 885,\n",
       " 886,\n",
       " 887,\n",
       " 888,\n",
       " 889,\n",
       " 890,\n",
       " 891,\n",
       " 892,\n",
       " 893,\n",
       " 894,\n",
       " 895,\n",
       " 896,\n",
       " 897,\n",
       " 898,\n",
       " 899,\n",
       " 900,\n",
       " 901,\n",
       " 902,\n",
       " 903,\n",
       " 904,\n",
       " 905,\n",
       " 906,\n",
       " 907,\n",
       " 908,\n",
       " 909,\n",
       " 910,\n",
       " 911,\n",
       " 912,\n",
       " 913,\n",
       " 914,\n",
       " 915,\n",
       " 916,\n",
       " 917,\n",
       " 918,\n",
       " 919,\n",
       " 920,\n",
       " 921,\n",
       " 922,\n",
       " 923,\n",
       " 924,\n",
       " 925,\n",
       " 926,\n",
       " 927,\n",
       " 928,\n",
       " 929,\n",
       " 930,\n",
       " 931,\n",
       " 932,\n",
       " 933,\n",
       " 934,\n",
       " 935,\n",
       " 936,\n",
       " 937,\n",
       " 938,\n",
       " 939,\n",
       " 940,\n",
       " 941,\n",
       " 942,\n",
       " 943,\n",
       " 944,\n",
       " 945,\n",
       " 946,\n",
       " 947,\n",
       " 948,\n",
       " 949,\n",
       " 950,\n",
       " 951,\n",
       " 952,\n",
       " 953,\n",
       " 954,\n",
       " 955,\n",
       " 956,\n",
       " 957,\n",
       " 958,\n",
       " 959,\n",
       " 960,\n",
       " 961,\n",
       " 962,\n",
       " 963,\n",
       " 964,\n",
       " 965,\n",
       " 966,\n",
       " 967,\n",
       " 968,\n",
       " 969,\n",
       " 970,\n",
       " 971,\n",
       " 972,\n",
       " 973,\n",
       " 974,\n",
       " 975,\n",
       " 976,\n",
       " 977,\n",
       " 978,\n",
       " 979,\n",
       " 980,\n",
       " 981,\n",
       " 982,\n",
       " 983,\n",
       " 984,\n",
       " 985,\n",
       " 986,\n",
       " 987,\n",
       " 988,\n",
       " 989,\n",
       " 990,\n",
       " 991,\n",
       " 992,\n",
       " 993,\n",
       " 994,\n",
       " 995,\n",
       " 996,\n",
       " 997,\n",
       " 998,\n",
       " 999,\n",
       " ...]"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list(df[\"UNIQUE_ID\"].unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "23341"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train['UNIQUE_ID'].nunique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "23336"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_val['UNIQUE_ID'].nunique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "23341"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "deaths[\"UNIQUE_ID\"].nunique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "23341"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"UNIQUE_ID\"].nunique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [],
   "source": [
    "a=torch.tensor([3,2,1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [],
   "source": [
    "b=a.repeat(10,5,1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([10, 5, 3])"
      ]
     },
     "execution_count": 97,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "torch.cumsum(b)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "a=torch.tensor([np.nan, 3, 4, np.nan])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "observed_mask=a==a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([ 0,  1,  1,  0], dtype=torch.uint8)"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "observed_mask"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "a=torch.randn((3,4,5))\n",
    "b=torch.randn((3,4,5))\n",
    "c=torch.randn((3,4,5))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([9, 4, 5])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d=torch.cat((a,b,c))\n",
    "d.size()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.float32\n"
     ]
    }
   ],
   "source": [
    "print(d.dtype)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "z=torch.zeros((4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([nan.,   0.,   0.,   3.])"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "torch.zeros((4)).masked_scatter_(1-observed_mask,a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([nan.,   3.,   4., nan.])"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a.float()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.float32"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a.dtype"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "b=a.repeat(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([nan.,   3.,   4., nan., nan.,   3.,   4., nan., nan.,   3.,\n",
       "          4., nan.])"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([nan.,   3.,   4., nan., nan.,   3.,   4., nan., nan.,   3.,\n",
       "          4., nan.])"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b.to(device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "device=torch.device(\"cpu\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.float32"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b.dtype"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[array([1, 0, 2]), array([3])]"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_test_split(np.arange(4),test_size=0.2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "a=3\n",
    "#a=None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a is not None "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "df=pd.read_csv(outfile_path+\"complete_tensor.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3082224"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df.index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>UNIQUE_ID</th>\n",
       "      <th>LABEL_CODE</th>\n",
       "      <th>TIME_STAMP</th>\n",
       "      <th>VALUENUM</th>\n",
       "      <th>MEAN</th>\n",
       "      <th>STD</th>\n",
       "      <th>VALUENORM</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>13227</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>14.332368</td>\n",
       "      <td>3.957156</td>\n",
       "      <td>0.168715</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>13227</td>\n",
       "      <td>0</td>\n",
       "      <td>26</td>\n",
       "      <td>12.0</td>\n",
       "      <td>14.332368</td>\n",
       "      <td>3.957156</td>\n",
       "      <td>-0.589405</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>13227</td>\n",
       "      <td>0</td>\n",
       "      <td>59</td>\n",
       "      <td>10.0</td>\n",
       "      <td>14.332368</td>\n",
       "      <td>3.957156</td>\n",
       "      <td>-1.094819</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>13227</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>25.0</td>\n",
       "      <td>23.683532</td>\n",
       "      <td>4.551461</td>\n",
       "      <td>0.289241</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>13227</td>\n",
       "      <td>1</td>\n",
       "      <td>26</td>\n",
       "      <td>25.0</td>\n",
       "      <td>23.683532</td>\n",
       "      <td>4.551461</td>\n",
       "      <td>0.289241</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>13227</td>\n",
       "      <td>1</td>\n",
       "      <td>59</td>\n",
       "      <td>27.0</td>\n",
       "      <td>23.683532</td>\n",
       "      <td>4.551461</td>\n",
       "      <td>0.728660</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6</td>\n",
       "      <td>13227</td>\n",
       "      <td>2</td>\n",
       "      <td>26</td>\n",
       "      <td>8.9</td>\n",
       "      <td>8.319696</td>\n",
       "      <td>0.907730</td>\n",
       "      <td>0.639292</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>7</td>\n",
       "      <td>13227</td>\n",
       "      <td>2</td>\n",
       "      <td>59</td>\n",
       "      <td>8.5</td>\n",
       "      <td>8.319696</td>\n",
       "      <td>0.907730</td>\n",
       "      <td>0.198632</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>8</td>\n",
       "      <td>13227</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>103.0</td>\n",
       "      <td>105.072921</td>\n",
       "      <td>6.552307</td>\n",
       "      <td>-0.316365</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>9</td>\n",
       "      <td>13227</td>\n",
       "      <td>3</td>\n",
       "      <td>26</td>\n",
       "      <td>107.0</td>\n",
       "      <td>105.072921</td>\n",
       "      <td>6.552307</td>\n",
       "      <td>0.294107</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>10</td>\n",
       "      <td>13227</td>\n",
       "      <td>3</td>\n",
       "      <td>59</td>\n",
       "      <td>106.0</td>\n",
       "      <td>105.072921</td>\n",
       "      <td>6.552307</td>\n",
       "      <td>0.141489</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>11</td>\n",
       "      <td>13227</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0.8</td>\n",
       "      <td>1.349956</td>\n",
       "      <td>1.445214</td>\n",
       "      <td>-0.380536</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>12</td>\n",
       "      <td>13227</td>\n",
       "      <td>4</td>\n",
       "      <td>26</td>\n",
       "      <td>0.6</td>\n",
       "      <td>1.349956</td>\n",
       "      <td>1.445214</td>\n",
       "      <td>-0.518924</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>13</td>\n",
       "      <td>13227</td>\n",
       "      <td>4</td>\n",
       "      <td>59</td>\n",
       "      <td>0.7</td>\n",
       "      <td>1.349956</td>\n",
       "      <td>1.445214</td>\n",
       "      <td>-0.449730</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>14</td>\n",
       "      <td>13227</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>116.0</td>\n",
       "      <td>135.362454</td>\n",
       "      <td>91.375215</td>\n",
       "      <td>-0.211901</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>15</td>\n",
       "      <td>13227</td>\n",
       "      <td>5</td>\n",
       "      <td>26</td>\n",
       "      <td>115.0</td>\n",
       "      <td>135.362454</td>\n",
       "      <td>91.375215</td>\n",
       "      <td>-0.222844</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>16</td>\n",
       "      <td>13227</td>\n",
       "      <td>5</td>\n",
       "      <td>59</td>\n",
       "      <td>137.0</td>\n",
       "      <td>135.362454</td>\n",
       "      <td>91.375215</td>\n",
       "      <td>0.017921</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>17</td>\n",
       "      <td>13227</td>\n",
       "      <td>6</td>\n",
       "      <td>26</td>\n",
       "      <td>1.6</td>\n",
       "      <td>2.001855</td>\n",
       "      <td>0.519634</td>\n",
       "      <td>-0.773342</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>18</td>\n",
       "      <td>13227</td>\n",
       "      <td>6</td>\n",
       "      <td>59</td>\n",
       "      <td>2.6</td>\n",
       "      <td>2.001855</td>\n",
       "      <td>0.519634</td>\n",
       "      <td>1.151088</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>19</td>\n",
       "      <td>13227</td>\n",
       "      <td>7</td>\n",
       "      <td>26</td>\n",
       "      <td>4.2</td>\n",
       "      <td>3.504950</td>\n",
       "      <td>1.392238</td>\n",
       "      <td>0.499232</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>20</td>\n",
       "      <td>13227</td>\n",
       "      <td>7</td>\n",
       "      <td>59</td>\n",
       "      <td>3.6</td>\n",
       "      <td>3.504950</td>\n",
       "      <td>1.392238</td>\n",
       "      <td>0.068271</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>21</td>\n",
       "      <td>13227</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>3.8</td>\n",
       "      <td>4.151537</td>\n",
       "      <td>0.712880</td>\n",
       "      <td>-0.493123</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>22</td>\n",
       "      <td>13227</td>\n",
       "      <td>8</td>\n",
       "      <td>26</td>\n",
       "      <td>4.1</td>\n",
       "      <td>4.151537</td>\n",
       "      <td>0.712880</td>\n",
       "      <td>-0.072294</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>23</td>\n",
       "      <td>13227</td>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "      <td>89.0</td>\n",
       "      <td>119.696930</td>\n",
       "      <td>151.316366</td>\n",
       "      <td>-0.202866</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>24</td>\n",
       "      <td>13227</td>\n",
       "      <td>11</td>\n",
       "      <td>0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>317.702850</td>\n",
       "      <td>1277.969093</td>\n",
       "      <td>-0.231385</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>25</td>\n",
       "      <td>13227</td>\n",
       "      <td>12</td>\n",
       "      <td>0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>2.099668</td>\n",
       "      <td>4.620891</td>\n",
       "      <td>-0.389463</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>26</td>\n",
       "      <td>13227</td>\n",
       "      <td>13</td>\n",
       "      <td>0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>24.828327</td>\n",
       "      <td>21.364763</td>\n",
       "      <td>-0.413219</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>27</td>\n",
       "      <td>13227</td>\n",
       "      <td>13</td>\n",
       "      <td>26</td>\n",
       "      <td>12.0</td>\n",
       "      <td>24.828327</td>\n",
       "      <td>21.364763</td>\n",
       "      <td>-0.600443</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>28</td>\n",
       "      <td>13227</td>\n",
       "      <td>13</td>\n",
       "      <td>59</td>\n",
       "      <td>14.0</td>\n",
       "      <td>24.828327</td>\n",
       "      <td>21.364763</td>\n",
       "      <td>-0.506831</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>29</td>\n",
       "      <td>13227</td>\n",
       "      <td>14</td>\n",
       "      <td>0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.305753</td>\n",
       "      <td>0.525673</td>\n",
       "      <td>2.081613</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3082194</th>\n",
       "      <td>3082194</td>\n",
       "      <td>1356</td>\n",
       "      <td>62</td>\n",
       "      <td>38</td>\n",
       "      <td>50.0</td>\n",
       "      <td>114.904780</td>\n",
       "      <td>122.628835</td>\n",
       "      <td>-0.529278</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3082195</th>\n",
       "      <td>3082195</td>\n",
       "      <td>1356</td>\n",
       "      <td>62</td>\n",
       "      <td>40</td>\n",
       "      <td>110.0</td>\n",
       "      <td>114.904780</td>\n",
       "      <td>122.628835</td>\n",
       "      <td>-0.039997</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3082196</th>\n",
       "      <td>3082196</td>\n",
       "      <td>1356</td>\n",
       "      <td>62</td>\n",
       "      <td>42</td>\n",
       "      <td>32.0</td>\n",
       "      <td>114.904780</td>\n",
       "      <td>122.628835</td>\n",
       "      <td>-0.676063</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3082197</th>\n",
       "      <td>3082197</td>\n",
       "      <td>1356</td>\n",
       "      <td>62</td>\n",
       "      <td>44</td>\n",
       "      <td>40.0</td>\n",
       "      <td>114.904780</td>\n",
       "      <td>122.628835</td>\n",
       "      <td>-0.610825</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3082198</th>\n",
       "      <td>3082198</td>\n",
       "      <td>1356</td>\n",
       "      <td>62</td>\n",
       "      <td>46</td>\n",
       "      <td>40.0</td>\n",
       "      <td>114.904780</td>\n",
       "      <td>122.628835</td>\n",
       "      <td>-0.610825</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3082199</th>\n",
       "      <td>3082199</td>\n",
       "      <td>1356</td>\n",
       "      <td>62</td>\n",
       "      <td>48</td>\n",
       "      <td>80.0</td>\n",
       "      <td>114.904780</td>\n",
       "      <td>122.628835</td>\n",
       "      <td>-0.284638</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3082200</th>\n",
       "      <td>3082200</td>\n",
       "      <td>1356</td>\n",
       "      <td>62</td>\n",
       "      <td>50</td>\n",
       "      <td>80.0</td>\n",
       "      <td>114.904780</td>\n",
       "      <td>122.628835</td>\n",
       "      <td>-0.284638</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3082201</th>\n",
       "      <td>3082201</td>\n",
       "      <td>1356</td>\n",
       "      <td>62</td>\n",
       "      <td>52</td>\n",
       "      <td>70.0</td>\n",
       "      <td>114.904780</td>\n",
       "      <td>122.628835</td>\n",
       "      <td>-0.366185</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3082202</th>\n",
       "      <td>3082202</td>\n",
       "      <td>1356</td>\n",
       "      <td>62</td>\n",
       "      <td>54</td>\n",
       "      <td>30.0</td>\n",
       "      <td>114.904780</td>\n",
       "      <td>122.628835</td>\n",
       "      <td>-0.692372</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3082203</th>\n",
       "      <td>3082203</td>\n",
       "      <td>1356</td>\n",
       "      <td>62</td>\n",
       "      <td>56</td>\n",
       "      <td>30.0</td>\n",
       "      <td>114.904780</td>\n",
       "      <td>122.628835</td>\n",
       "      <td>-0.692372</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3082204</th>\n",
       "      <td>3082204</td>\n",
       "      <td>1356</td>\n",
       "      <td>62</td>\n",
       "      <td>58</td>\n",
       "      <td>80.0</td>\n",
       "      <td>114.904780</td>\n",
       "      <td>122.628835</td>\n",
       "      <td>-0.284638</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3082205</th>\n",
       "      <td>3082205</td>\n",
       "      <td>1356</td>\n",
       "      <td>62</td>\n",
       "      <td>60</td>\n",
       "      <td>50.0</td>\n",
       "      <td>114.904780</td>\n",
       "      <td>122.628835</td>\n",
       "      <td>-0.529278</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3082206</th>\n",
       "      <td>3082206</td>\n",
       "      <td>1356</td>\n",
       "      <td>62</td>\n",
       "      <td>62</td>\n",
       "      <td>70.0</td>\n",
       "      <td>114.904780</td>\n",
       "      <td>122.628835</td>\n",
       "      <td>-0.366185</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3082207</th>\n",
       "      <td>3082207</td>\n",
       "      <td>1356</td>\n",
       "      <td>62</td>\n",
       "      <td>64</td>\n",
       "      <td>42.0</td>\n",
       "      <td>114.904780</td>\n",
       "      <td>122.628835</td>\n",
       "      <td>-0.594516</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3082208</th>\n",
       "      <td>3082208</td>\n",
       "      <td>1356</td>\n",
       "      <td>62</td>\n",
       "      <td>66</td>\n",
       "      <td>100.0</td>\n",
       "      <td>114.904780</td>\n",
       "      <td>122.628835</td>\n",
       "      <td>-0.121544</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3082209</th>\n",
       "      <td>3082209</td>\n",
       "      <td>1356</td>\n",
       "      <td>62</td>\n",
       "      <td>68</td>\n",
       "      <td>80.0</td>\n",
       "      <td>114.904780</td>\n",
       "      <td>122.628835</td>\n",
       "      <td>-0.284638</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3082210</th>\n",
       "      <td>3082210</td>\n",
       "      <td>1356</td>\n",
       "      <td>62</td>\n",
       "      <td>70</td>\n",
       "      <td>60.0</td>\n",
       "      <td>114.904780</td>\n",
       "      <td>122.628835</td>\n",
       "      <td>-0.447731</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3082211</th>\n",
       "      <td>3082211</td>\n",
       "      <td>1356</td>\n",
       "      <td>62</td>\n",
       "      <td>72</td>\n",
       "      <td>60.0</td>\n",
       "      <td>114.904780</td>\n",
       "      <td>122.628835</td>\n",
       "      <td>-0.447731</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3082212</th>\n",
       "      <td>3082212</td>\n",
       "      <td>1356</td>\n",
       "      <td>62</td>\n",
       "      <td>74</td>\n",
       "      <td>110.0</td>\n",
       "      <td>114.904780</td>\n",
       "      <td>122.628835</td>\n",
       "      <td>-0.039997</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3082213</th>\n",
       "      <td>3082213</td>\n",
       "      <td>1356</td>\n",
       "      <td>62</td>\n",
       "      <td>76</td>\n",
       "      <td>80.0</td>\n",
       "      <td>114.904780</td>\n",
       "      <td>122.628835</td>\n",
       "      <td>-0.284638</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3082214</th>\n",
       "      <td>3082214</td>\n",
       "      <td>1356</td>\n",
       "      <td>62</td>\n",
       "      <td>78</td>\n",
       "      <td>80.0</td>\n",
       "      <td>114.904780</td>\n",
       "      <td>122.628835</td>\n",
       "      <td>-0.284638</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3082215</th>\n",
       "      <td>3082215</td>\n",
       "      <td>1356</td>\n",
       "      <td>62</td>\n",
       "      <td>80</td>\n",
       "      <td>100.0</td>\n",
       "      <td>114.904780</td>\n",
       "      <td>122.628835</td>\n",
       "      <td>-0.121544</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3082216</th>\n",
       "      <td>3082216</td>\n",
       "      <td>1356</td>\n",
       "      <td>62</td>\n",
       "      <td>82</td>\n",
       "      <td>60.0</td>\n",
       "      <td>114.904780</td>\n",
       "      <td>122.628835</td>\n",
       "      <td>-0.447731</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3082217</th>\n",
       "      <td>3082217</td>\n",
       "      <td>1356</td>\n",
       "      <td>62</td>\n",
       "      <td>84</td>\n",
       "      <td>40.0</td>\n",
       "      <td>114.904780</td>\n",
       "      <td>122.628835</td>\n",
       "      <td>-0.610825</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3082218</th>\n",
       "      <td>3082218</td>\n",
       "      <td>1356</td>\n",
       "      <td>62</td>\n",
       "      <td>86</td>\n",
       "      <td>50.0</td>\n",
       "      <td>114.904780</td>\n",
       "      <td>122.628835</td>\n",
       "      <td>-0.529278</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3082219</th>\n",
       "      <td>3082219</td>\n",
       "      <td>1356</td>\n",
       "      <td>62</td>\n",
       "      <td>88</td>\n",
       "      <td>43.0</td>\n",
       "      <td>114.904780</td>\n",
       "      <td>122.628835</td>\n",
       "      <td>-0.586361</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3082220</th>\n",
       "      <td>3082220</td>\n",
       "      <td>1356</td>\n",
       "      <td>62</td>\n",
       "      <td>90</td>\n",
       "      <td>40.0</td>\n",
       "      <td>114.904780</td>\n",
       "      <td>122.628835</td>\n",
       "      <td>-0.610825</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3082221</th>\n",
       "      <td>3082221</td>\n",
       "      <td>1356</td>\n",
       "      <td>62</td>\n",
       "      <td>92</td>\n",
       "      <td>40.0</td>\n",
       "      <td>114.904780</td>\n",
       "      <td>122.628835</td>\n",
       "      <td>-0.610825</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3082222</th>\n",
       "      <td>3082222</td>\n",
       "      <td>1356</td>\n",
       "      <td>62</td>\n",
       "      <td>94</td>\n",
       "      <td>55.0</td>\n",
       "      <td>114.904780</td>\n",
       "      <td>122.628835</td>\n",
       "      <td>-0.488505</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3082223</th>\n",
       "      <td>3082223</td>\n",
       "      <td>1356</td>\n",
       "      <td>40</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.159712</td>\n",
       "      <td>3.337186</td>\n",
       "      <td>-0.647165</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3082224 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         Unnamed: 0  UNIQUE_ID  LABEL_CODE  TIME_STAMP  VALUENUM        MEAN  \\\n",
       "0                 0      13227           0           0      15.0   14.332368   \n",
       "1                 1      13227           0          26      12.0   14.332368   \n",
       "2                 2      13227           0          59      10.0   14.332368   \n",
       "3                 3      13227           1           0      25.0   23.683532   \n",
       "4                 4      13227           1          26      25.0   23.683532   \n",
       "5                 5      13227           1          59      27.0   23.683532   \n",
       "6                 6      13227           2          26       8.9    8.319696   \n",
       "7                 7      13227           2          59       8.5    8.319696   \n",
       "8                 8      13227           3           0     103.0  105.072921   \n",
       "9                 9      13227           3          26     107.0  105.072921   \n",
       "10               10      13227           3          59     106.0  105.072921   \n",
       "11               11      13227           4           0       0.8    1.349956   \n",
       "12               12      13227           4          26       0.6    1.349956   \n",
       "13               13      13227           4          59       0.7    1.349956   \n",
       "14               14      13227           5           0     116.0  135.362454   \n",
       "15               15      13227           5          26     115.0  135.362454   \n",
       "16               16      13227           5          59     137.0  135.362454   \n",
       "17               17      13227           6          26       1.6    2.001855   \n",
       "18               18      13227           6          59       2.6    2.001855   \n",
       "19               19      13227           7          26       4.2    3.504950   \n",
       "20               20      13227           7          59       3.6    3.504950   \n",
       "21               21      13227           8           0       3.8    4.151537   \n",
       "22               22      13227           8          26       4.1    4.151537   \n",
       "23               23      13227          10           0      89.0  119.696930   \n",
       "24               24      13227          11           0      22.0  317.702850   \n",
       "25               25      13227          12           0       0.3    2.099668   \n",
       "26               26      13227          13           0      16.0   24.828327   \n",
       "27               27      13227          13          26      12.0   24.828327   \n",
       "28               28      13227          13          59      14.0   24.828327   \n",
       "29               29      13227          14           0       1.4    0.305753   \n",
       "...             ...        ...         ...         ...       ...         ...   \n",
       "3082194     3082194       1356          62          38      50.0  114.904780   \n",
       "3082195     3082195       1356          62          40     110.0  114.904780   \n",
       "3082196     3082196       1356          62          42      32.0  114.904780   \n",
       "3082197     3082197       1356          62          44      40.0  114.904780   \n",
       "3082198     3082198       1356          62          46      40.0  114.904780   \n",
       "3082199     3082199       1356          62          48      80.0  114.904780   \n",
       "3082200     3082200       1356          62          50      80.0  114.904780   \n",
       "3082201     3082201       1356          62          52      70.0  114.904780   \n",
       "3082202     3082202       1356          62          54      30.0  114.904780   \n",
       "3082203     3082203       1356          62          56      30.0  114.904780   \n",
       "3082204     3082204       1356          62          58      80.0  114.904780   \n",
       "3082205     3082205       1356          62          60      50.0  114.904780   \n",
       "3082206     3082206       1356          62          62      70.0  114.904780   \n",
       "3082207     3082207       1356          62          64      42.0  114.904780   \n",
       "3082208     3082208       1356          62          66     100.0  114.904780   \n",
       "3082209     3082209       1356          62          68      80.0  114.904780   \n",
       "3082210     3082210       1356          62          70      60.0  114.904780   \n",
       "3082211     3082211       1356          62          72      60.0  114.904780   \n",
       "3082212     3082212       1356          62          74     110.0  114.904780   \n",
       "3082213     3082213       1356          62          76      80.0  114.904780   \n",
       "3082214     3082214       1356          62          78      80.0  114.904780   \n",
       "3082215     3082215       1356          62          80     100.0  114.904780   \n",
       "3082216     3082216       1356          62          82      60.0  114.904780   \n",
       "3082217     3082217       1356          62          84      40.0  114.904780   \n",
       "3082218     3082218       1356          62          86      50.0  114.904780   \n",
       "3082219     3082219       1356          62          88      43.0  114.904780   \n",
       "3082220     3082220       1356          62          90      40.0  114.904780   \n",
       "3082221     3082221       1356          62          92      40.0  114.904780   \n",
       "3082222     3082222       1356          62          94      55.0  114.904780   \n",
       "3082223     3082223       1356          40           0       2.0    4.159712   \n",
       "\n",
       "                 STD  VALUENORM  \n",
       "0           3.957156   0.168715  \n",
       "1           3.957156  -0.589405  \n",
       "2           3.957156  -1.094819  \n",
       "3           4.551461   0.289241  \n",
       "4           4.551461   0.289241  \n",
       "5           4.551461   0.728660  \n",
       "6           0.907730   0.639292  \n",
       "7           0.907730   0.198632  \n",
       "8           6.552307  -0.316365  \n",
       "9           6.552307   0.294107  \n",
       "10          6.552307   0.141489  \n",
       "11          1.445214  -0.380536  \n",
       "12          1.445214  -0.518924  \n",
       "13          1.445214  -0.449730  \n",
       "14         91.375215  -0.211901  \n",
       "15         91.375215  -0.222844  \n",
       "16         91.375215   0.017921  \n",
       "17          0.519634  -0.773342  \n",
       "18          0.519634   1.151088  \n",
       "19          1.392238   0.499232  \n",
       "20          1.392238   0.068271  \n",
       "21          0.712880  -0.493123  \n",
       "22          0.712880  -0.072294  \n",
       "23        151.316366  -0.202866  \n",
       "24       1277.969093  -0.231385  \n",
       "25          4.620891  -0.389463  \n",
       "26         21.364763  -0.413219  \n",
       "27         21.364763  -0.600443  \n",
       "28         21.364763  -0.506831  \n",
       "29          0.525673   2.081613  \n",
       "...              ...        ...  \n",
       "3082194   122.628835  -0.529278  \n",
       "3082195   122.628835  -0.039997  \n",
       "3082196   122.628835  -0.676063  \n",
       "3082197   122.628835  -0.610825  \n",
       "3082198   122.628835  -0.610825  \n",
       "3082199   122.628835  -0.284638  \n",
       "3082200   122.628835  -0.284638  \n",
       "3082201   122.628835  -0.366185  \n",
       "3082202   122.628835  -0.692372  \n",
       "3082203   122.628835  -0.692372  \n",
       "3082204   122.628835  -0.284638  \n",
       "3082205   122.628835  -0.529278  \n",
       "3082206   122.628835  -0.366185  \n",
       "3082207   122.628835  -0.594516  \n",
       "3082208   122.628835  -0.121544  \n",
       "3082209   122.628835  -0.284638  \n",
       "3082210   122.628835  -0.447731  \n",
       "3082211   122.628835  -0.447731  \n",
       "3082212   122.628835  -0.039997  \n",
       "3082213   122.628835  -0.284638  \n",
       "3082214   122.628835  -0.284638  \n",
       "3082215   122.628835  -0.121544  \n",
       "3082216   122.628835  -0.447731  \n",
       "3082217   122.628835  -0.610825  \n",
       "3082218   122.628835  -0.529278  \n",
       "3082219   122.628835  -0.586361  \n",
       "3082220   122.628835  -0.610825  \n",
       "3082221   122.628835  -0.610825  \n",
       "3082222   122.628835  -0.488505  \n",
       "3082223     3.337186  -0.647165  \n",
       "\n",
       "[3082224 rows x 8 columns]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/XXXX/miniconda3/envs/pytorch/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2728: DtypeWarning: Columns (6,9) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  interactivity=interactivity, compiler=compiler, result=result)\n"
     ]
    }
   ],
   "source": [
    "lab_df=pd.read_csv(file_path+\"LAB_processed.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['Anion Gap', 'Bicarbonate', 'Calcium, Total', 'Chloride',\n",
       "       'Creatinine', 'Glucose', 'Magnesium', 'Phosphate', 'Potassium',\n",
       "       'Sodium', 'Alkaline Phosphatase',\n",
       "       'Asparate Aminotransferase (AST)', 'Bilirubin, Total',\n",
       "       'Urea Nitrogen', 'Basophils', 'Eosinophils', 'Hematocrit',\n",
       "       'Hemoglobin', 'Lymphocytes', 'MCH', 'MCHC', 'MCV', 'Monocytes',\n",
       "       'Neutrophils', 'Platelet Count', 'RDW', 'Red Blood Cells',\n",
       "       'White Blood Cells', 'PTT', 'Base Excess', 'Calculated Total CO2',\n",
       "       'Lactate', 'pCO2', 'pH', 'pO2', 'PT',\n",
       "       'Alanine Aminotransferase (ALT)', 'Albumin', 'Specific Gravity'],\n",
       "      dtype=object)"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lab_df[\"LABEL\"].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "a=pd.read_csv(outfile_path+\"LSTM_covariates_test.csv\")\n",
    "b=pd.read_csv(outfile_path+\"LSTM_tensor_test.csv\") "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2125"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(a.index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2125"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b[\"UNIQUE_ID\"].nunique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "96"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(merged_df[\"LABEL\"].unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.05962794510522801"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(merged_df.index)/(96*merged_df[\"SUBJECT_ID\"].nunique()*48*2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>level_0</th>\n",
       "      <th>CHARTTIME</th>\n",
       "      <th>HADM_ID</th>\n",
       "      <th>LABEL</th>\n",
       "      <th>Origin</th>\n",
       "      <th>SUBJECT_ID</th>\n",
       "      <th>VALUENUM</th>\n",
       "      <th>index</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>2133-02-05 06:29:00</td>\n",
       "      <td>139787.0</td>\n",
       "      <td>Potassium Chloride</td>\n",
       "      <td>Inputs</td>\n",
       "      <td>27063</td>\n",
       "      <td>1.354906</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>2133-02-05 06:59:00</td>\n",
       "      <td>139787.0</td>\n",
       "      <td>Potassium Chloride</td>\n",
       "      <td>Inputs</td>\n",
       "      <td>27063</td>\n",
       "      <td>1.354906</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>2133-02-05 07:29:00</td>\n",
       "      <td>139787.0</td>\n",
       "      <td>Potassium Chloride</td>\n",
       "      <td>Inputs</td>\n",
       "      <td>27063</td>\n",
       "      <td>1.354906</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>2133-02-05 07:59:00</td>\n",
       "      <td>139787.0</td>\n",
       "      <td>Potassium Chloride</td>\n",
       "      <td>Inputs</td>\n",
       "      <td>27063</td>\n",
       "      <td>1.354906</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>2133-02-05 08:29:00</td>\n",
       "      <td>139787.0</td>\n",
       "      <td>Potassium Chloride</td>\n",
       "      <td>Inputs</td>\n",
       "      <td>27063</td>\n",
       "      <td>1.354906</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   level_0           CHARTTIME   HADM_ID               LABEL  Origin  \\\n",
       "0        0 2133-02-05 06:29:00  139787.0  Potassium Chloride  Inputs   \n",
       "1        1 2133-02-05 06:59:00  139787.0  Potassium Chloride  Inputs   \n",
       "2        2 2133-02-05 07:29:00  139787.0  Potassium Chloride  Inputs   \n",
       "3        3 2133-02-05 07:59:00  139787.0  Potassium Chloride  Inputs   \n",
       "4        4 2133-02-05 08:29:00  139787.0  Potassium Chloride  Inputs   \n",
       "\n",
       "   SUBJECT_ID  VALUENUM  index  \n",
       "0       27063  1.354906    0.0  \n",
       "1       27063  1.354906    1.0  \n",
       "2       27063  1.354906    2.0  \n",
       "3       27063  1.354906    3.0  \n",
       "4       27063  1.354906    4.0  "
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "merged_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['D5WDrug Drug', 'Docusate SodiumDrug Drug',\n",
       "       'Magnesium SulfateDrug Drug', 'Potassium ChlorideDrug Drug',\n",
       "       'PantoprazoleDrug Drug', 'BisacodylDrug Drug',\n",
       "       'Humulin-R InsulinDrug Drug', 'AspirinDrug Drug',\n",
       "       'Sodium Chloride 0.9%  FlushDrug Drug',\n",
       "       'Metoprolol TartrateDrug Drug'], dtype=object)"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "presc_df[\"LABEL\"].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "96"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "merged_df[\"LABEL\"].nunique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
